aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--plugins/gzip.lua185
1 files changed, 185 insertions, 0 deletions
diff --git a/plugins/gzip.lua b/plugins/gzip.lua
new file mode 100644
index 0000000..8cc93a1
--- /dev/null
+++ b/plugins/gzip.lua
@@ -0,0 +1,185 @@
+--
+-- gzip.lua: GZIP File Format
+--
+-- Copyright (c) 2017, Přemysl Janouch <p.janouch@gmail.com>
+--
+-- Permission to use, copy, modify, and/or distribute this software for any
+-- purpose with or without fee is hereby granted, provided that the above
+-- copyright notice and this permission notice appear in all copies.
+--
+-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+-- SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+-- OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+-- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+--
+
+local detect = function (c)
+ return c:read (2) == "\x1f\x8b"
+end
+
+local function latin1_to_utf8 (s)
+ local u = ""
+ for _, c in ipairs (table.pack (s:byte (1, #s))) do
+ if c < 0x80 then
+ u = u .. string.char (c)
+ else
+ u = u .. string.char (0xc0 | c >> 6, 0x80 | c & 0x3f)
+ end
+ end
+ return u
+end
+
+-- Everything here is based on RFC 1952 and some bits of dictzip
+local crc32_table = function ()
+ local table = {}
+ for n = 0, 255 do
+ local c = n
+ for k = 0, 7 do
+ if c & 1 ~= 0 then
+ c = 0xedb88320 ~ (c >> 1)
+ else
+ c = c >> 1
+ end
+ end
+ table[n] = c
+ end
+ return table
+end
+
+local crc32 = function (s)
+ local table, c = crc32_table (), 0xffffffff
+ for n = 1, #s do c = table[(c ~ s:byte (n)) & 0xff] ~ (c >> 8) end
+ return c ~ 0xffffffff
+end
+
+local os_table = {
+ [0] = "FAT filesystem",
+ [1] = "Amiga",
+ [2] = "VMS",
+ [3] = "Unix",
+ [4] = "VM/CMS",
+ [5] = "Atari TOS",
+ [6] = "HPFS filesystem",
+ [7] = "Macintosh",
+ [8] = "Z-System",
+ [9] = "CP/M",
+ [10] = "TOPS-20",
+ [11] = "NTFS filesystem",
+ [12] = "QDOS",
+ [13] = "Acord RISCOS",
+ [255] = "unknown"
+}
+
+local decode = function (c)
+ if not detect (c ()) then error ("not a GZIP file") end
+ local start = c.position
+
+ c.endianity = 'le'
+ c:u16 ("GZIP magic")
+
+ local deflate
+ c:u8 ("compression method: %s", function (u8)
+ if u8 ~= 8 then return "unknown: %d", u8 end
+ deflate = true
+ return "deflate"
+ end)
+
+ local text, hcrc, extra, name, comment
+ c:u8 ("flags: %s", function (u8)
+ text = u8 & 1 == 1
+ hcrc = (u8 >> 1) & 1 == 1
+ extra = (u8 >> 2) & 1 == 1
+ name = (u8 >> 3) & 1 == 1
+ comment = (u8 >> 4) & 1 == 1
+
+ local flags = ""
+ if text then flags = flags .. ", text" end
+ if hcrc then flags = flags .. ", header CRC" end
+ if extra then flags = flags .. ", extra" end
+ if name then flags = flags .. ", filename" end
+ if comment then flags = flags .. ", comment" end
+
+ if flags == "" then
+ return "none"
+ else
+ return "%s", flags:sub (3)
+ end
+ end)
+
+ c:u32 ("modified time: %s", function (u32)
+ if u32 == 0 then return "none" end
+ return os.date ("!%F %T", u32)
+ end)
+ c:u8 ("extra flags: %s", function (u8)
+ if deflate then
+ if u8 == 2 then return "slowest (%d)", u8 end
+ if u8 == 4 then return "fastest (%d)", u8 end
+ end
+ return "unknown: %d", u8
+ end)
+ c:u8 ("OS: %s", function (u8)
+ os = os_table[u8]
+ if os then return os end
+ return "unknown: %d", u8
+ end)
+
+ local extra_table = {}
+ if extra then
+ local len = c:u16 ("extra field length: %d")
+ c (c.position, c.position + len - 1):mark ("extra field")
+
+ -- This will handle even overflowing subfields
+ while len >= 4 do
+ local p, sid = c.position, c:read (2)
+ c (p, c.position - 1):mark ("subfield ID: %s", sid)
+ local sid_len = c:u16 ("subfield length: %d")
+
+ local subfield = c (c.position, c.position + sid_len - 1)
+ subfield:mark ("subfield data")
+ extra_table[sid] = subfield
+ c.position = c.position + sid_len
+ len = len - 4 - sid_len
+ end
+ c.position = c.position + len
+ end
+
+ if name then
+ c:cstring ("filename: %s", latin1_to_utf8)
+ end
+ if comment then
+ c:cstring ("comment: %s", latin1_to_utf8)
+ end
+ if hcrc then
+ c:u16 ("CRC-16: %s", function (u16)
+ local crc = 0xffff & crc32 (c (start):read (c.position - 1))
+ if crc == u16 then check = "ok" else check = "failed" end
+ return "%#06x (%s)", u16, check
+ end)
+ end
+
+ -- Compressed data follows immediately
+ -- We can jump through it without decompression in dictzip v1 archives
+ local ra = extra_table["RA"]
+ if not ra then return end
+ local ra_ver = ra:u16 ("RA version: %d")
+ if ra_ver ~= 1 then return end
+
+ local ra_chunk = ra:u16 ("chunk length: %d")
+ local ra_count = ra:u16 ("chunk count: %d")
+ for i = 1, ra_count do
+ local len = ra:u16 ("chunk " .. i .. " compressed length: %d")
+ c (c.position, c.position + len - 1):mark ("chunk " .. i)
+ c.position = c.position + len
+ end
+ -- 1 final, 01 static, 0000000 end of block, padding discarded
+ -- This is the kind of block that dictzip finalizes archives with
+ if c:u16 () & 0x03ff == 0x0003 then
+ c:u32 ("CRC-32: %#010x")
+ c:u32 ("input size: %d")
+ end
+end
+
+hex.register { type="gzip", detect=detect, decode=decode }