//
// info.c: acquire information about JPEG/TIFF/BMFF/WebP files in JSON format
//
// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//

#include "info.h"

#include <jv.h>

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// --- ISO/IEC base media file format ------------------------------------------
// ISO/IEC 14496-12:2015(E), used to be publicly available, now there's only:
// https://mpeg.chiariglione.org/standards/mpeg-4/iso-base-media-file-format/text-isoiec-14496-12-5th-edition
// but people have managed to archive the final version as well:
// https://b.goeswhere.com/ISO_IEC_14496-12_2015.pdf
//
// ISO/IEC 23008-12:2017 Information technology -
// High efficiency coding and media delivery in heterogeneous environments -
// Part 12: Image File Format + Cor 1:2020 Technical Corrigendum 1
// https://standards.iso.org/ittf/PubliclyAvailableStandards/

static jv
parse_bmff_box(jv o, const char *type, const uint8_t *data, size_t len)
{
	// TODO(p): Parse out "uuid"'s uint8_t[16] initial field, present as hex.
	// TODO(p): Parse out "ftyp" contents: 14496-12:2015 4.3
	// TODO(p): Parse out other important boxes: 14496-12:2015 8+
	return add_to_subarray(o, "boxes", jv_string(type));
}

static bool
detect_bmff(const uint8_t *p, size_t len)
{
	// 4.2 Object Structure--this box need not be present, nor at the beginning
	// TODO(p): What does `aligned(8)` mean? It's probably in bits.
	return len >= 8 && !memcmp(p + 4, "ftyp", 4);
}

static jv
parse_bmff(jv o, const uint8_t *p, size_t len)
{
	if (!detect_bmff(p, len))
		return add_error(o, "not BMFF at all or unsupported");

	const uint8_t *end = p + len;
	while (p < end) {
		if (end - p < 8) {
			o = add_warning(o, "box framing mismatch");
			break;
		}

		char type[5] = "";
		memcpy(type, p + 4, 4);

		uint64_t box_size = u32be(p);
		const uint8_t *data = p + 8;
		if (box_size == 1) {
			if (end - p < 16) {
				o = add_warning(o, "unexpected EOF");
				break;
			}
			box_size = u64be(data);
			data += 8;
		} else if (!box_size)
			box_size = end - p;

		if (box_size > (uint64_t) (end - p)) {
			o = add_warning(o, "unexpected EOF");
			break;
		}

		size_t data_len = box_size - (data - p);
		o = parse_bmff_box(o, type, data, data_len);
		p += box_size;
	}
	return o;
}

// --- WebP --------------------------------------------------------------------
// libwebp won't let us simply iterate over all chunks, so handroll it.
//
// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt
// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt
// https://datatracker.ietf.org/doc/html/rfc6386
//
// Pretty versions, hopefully not outdated:
// https://developers.google.com/speed/webp/docs/riff_container
// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification

static bool
detect_webp(const uint8_t *p, size_t len)
{
	return len >= 12 && !memcmp(p, "RIFF", 4) && !memcmp(p + 8, "WEBP", 4);
}

static jv
parse_webp_vp8(jv o, const uint8_t *p, size_t len)
{
	if (len < 10 || (p[0] & 1) != 0 /* key frame */ ||
		p[3] != 0x9d || p[4] != 0x01 || p[5] != 0x2a) {
		return add_warning(o, "invalid VP8 chunk");
	}

	o = jv_set(o, jv_string("width"), jv_number(u16le(p + 6) & 0x3fff));
	o = jv_set(o, jv_string("height"), jv_number(u16le(p + 8) & 0x3fff));
	return o;
}

static jv
parse_webp_vp8l(jv o, const uint8_t *p, size_t len)
{
	if (len < 5 || p[0] != 0x2f)
		return add_warning(o, "invalid VP8L chunk");

	// Reading LSB-first from a little endian value means reading in order.
	uint32_t header = u32le(p + 1);
	o = jv_set(o, jv_string("width"), jv_number((header & 0x3fff) + 1));
	header >>= 14;
	o = jv_set(o, jv_string("height"), jv_number((header & 0x3fff) + 1));
	header >>= 14;
	o = jv_set(o, jv_string("alpha_is_used"), jv_bool(header & 1));
	return o;
}

static jv
parse_webp_vp8x(jv o, const uint8_t *p, size_t len)
{
	if (len < 10)
		return add_warning(o, "invalid VP8X chunk");

	// Most of the fields in this chunk are duplicate or inferrable.
	// Probably not worth decoding or verifying.
	// TODO(p): For animations, we need to use the width and height from here.
	uint8_t flags = p[0];
	o = jv_set(o, jv_string("animation"), jv_bool((flags >> 1) & 1));
	return o;
}

static jv
parse_webp(jv o, const uint8_t *p, size_t len)
{
	if (!detect_webp(p, len))
		return add_error(o, "not a WEBP file");

	// TODO(p): This can still be parseable.
	// TODO(p): Warn on trailing data.
	uint32_t size = u32le(p + 4);
	if (8 + size < len)
		return add_error(o, "truncated file");

	const uint8_t *end = p + 8 + size;
	p += 12;

	jv chunks = jv_array();
	while (p < end) {
		if (end - p < 8) {
			o = add_warning(o, "framing mismatch");
			printf("%ld", end - p);
			break;
		}

		uint32_t chunk_size = u32le(p + 4);
		uint32_t chunk_advance = (chunk_size + 1) & ~1;
		if (p + 8 + chunk_advance > end) {
			o = add_warning(o, "runaway chunk payload");
			break;
		}

		char fourcc[5] = "";
		memcpy(fourcc, p, 4);
		chunks = jv_array_append(chunks, jv_string(fourcc));
		p += 8;

		// TODO(p): Decode more chunks.
		if (!strcmp(fourcc, "VP8 "))
			o = parse_webp_vp8(o, p, chunk_size);
		if (!strcmp(fourcc, "VP8L"))
			o = parse_webp_vp8l(o, p, chunk_size);
		if (!strcmp(fourcc, "VP8X"))
			o = parse_webp_vp8x(o, p, chunk_size);
		if (!strcmp(fourcc, "EXIF"))
			o = parse_exif(o, p, chunk_size);
		if (!strcmp(fourcc, "ICCP"))
			o = parse_icc(o, p, chunk_size);
		p += chunk_advance;
	}
	return jv_set(o, jv_string("chunks"), chunks);
}

// --- I/O ---------------------------------------------------------------------

static struct {
	const char *name;
	bool (*detect) (const uint8_t *, size_t);
	jv (*parse) (jv, const uint8_t *, size_t);
} formats[] = {
	{"JPEG", detect_jpeg, parse_jpeg},
	{"TIFF", detect_tiff, parse_tiff},
	{"BMFF", detect_bmff, parse_bmff},
	{"WebP", detect_webp, parse_webp},
};

static jv
parse_any(jv o, const uint8_t *p, size_t len)
{
	// TODO(p): Also see if the file extension is appropriate.
	for (size_t i = 0; i < sizeof formats / sizeof *formats; i++) {
		if (!formats[i].detect(p, len))
			continue;
		if (getenv("INFO_IDENTIFY"))
			o = jv_set(o, jv_string("format"), jv_string(formats[i].name));
		return formats[i].parse(o, p, len);
	}
	return add_error(o, "unsupported file format");
}

static jv
do_file(const char *filename, jv o)
{
	const char *err = NULL;
	FILE *fp = fopen(filename, "rb");
	if (!fp) {
		err = strerror(errno);
		goto error;
	}

	uint8_t *data = NULL, buf[256 << 10];
	size_t n, len = 0;
	while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) {
		data = realloc(data, len + n);
		memcpy(data + len, buf, n);
		len += n;
	}
	if (ferror(fp)) {
		err = strerror(errno);
		goto error_read;
	}

#if 0
	// Not sure if I want to ensure their existence...
	o = jv_object_set(o, jv_string("info"), jv_array());
	o = jv_object_set(o, jv_string("warnings"), jv_array());
#endif

	o = parse_any(o, data, len);
error_read:
	fclose(fp);
	free(data);
error:
	if (err)
		o = add_error(o, err);
	return o;
}

int
main(int argc, char *argv[])
{
	// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
	// Usage: find . -print0 | xargs -0 ./info
	for (int i = 1; i < argc; i++) {
		const char *filename = argv[i];

		jv o = jv_object();
		o = jv_object_set(o, jv_string("filename"), jv_string(filename));
		o = do_file(filename, o);
		jv_dumpf(o, stdout, 0 /* JV_PRINT_SORTED would discard information. */);
		fputc('\n', stdout);
	}
	return 0;
}