aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2020-08-12 07:29:57 +0200
committerPřemysl Eric Janouch <p@janouch.name>2020-08-12 07:53:21 +0200
commitceea7dca2f4ff356e136b9a72611b9b66d402eaa (patch)
tree3e5c51ac871a32833608e5a19a8b1a7d06fd931a
parent486cafa6b447d7af296411f7c50d4c078f3eac34 (diff)
downloadpdf-simple-sign-c++-library.tar.gz
pdf-simple-sign-c++-library.tar.xz
pdf-simple-sign-c++-library.zip
C++: split out a library with a trivial interfaceorigin/c++-libraryc++-library
Closes #2.
-rw-r--r--meson.build9
-rw-r--r--pdf-simple-sign.cpp895
-rw-r--r--pdf-simple-sign.h28
-rw-r--r--pdf.cpp906
4 files changed, 950 insertions, 888 deletions
diff --git a/meson.build b/meson.build
index 0446cc3..6544cdb 100644
--- a/meson.build
+++ b/meson.build
@@ -1,5 +1,12 @@
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
cryptodep = dependency('libcrypto')
-executable('pdf-simple-sign', 'pdf-simple-sign.cpp',
+
+executable('pdf-simple-sign', 'pdf-simple-sign.cpp', 'pdf.cpp',
+ install : true,
+ dependencies : cryptodep)
+
+install_headers('pdf-simple-sign.h')
+library('pdf-simple-sign', 'pdf.cpp',
+ soversion : 0,
install : true,
dependencies : cryptodep)
diff --git a/pdf-simple-sign.cpp b/pdf-simple-sign.cpp
index 818270e..8f3fe3f 100644
--- a/pdf-simple-sign.cpp
+++ b/pdf-simple-sign.cpp
@@ -2,7 +2,7 @@
//
// pdf-simple-sign: simple PDF signer
//
-// Copyright (c) 2017, Přemysl Eric Janouch <p@janouch.name>
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
@@ -17,899 +17,22 @@
//
#include <cstdio>
-#include <cmath>
-#undef NDEBUG
-#include <cassert>
-
-#include <vector>
-#include <map>
-#include <regex>
-#include <memory>
-#include <set>
-
-#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
-#error Need libstdc++ >= 4.9 for <regex>
-#endif
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+#include <cstdarg>
#include <unistd.h>
#include <getopt.h>
-#include <openssl/err.h>
-#include <openssl/x509v3.h>
-#include <openssl/pkcs12.h>
-
-// -------------------------------------------------------------------------------------------------
-
-using uint = unsigned int;
-
-static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
- std::string res;
- if (v.empty())
- return res;
- for (const auto& s : v)
- res += s + delim;
- return res.substr(0, res.length() - delim.length());
-}
-
-template<typename... Args>
-std::string ssprintf(const std::string& format, Args... args) {
- size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
- std::unique_ptr<char[]> buf(new char[size]);
- std::snprintf(buf.get(), size, format.c_str(), args...);
- return std::string(buf.get(), buf.get() + size - 1);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
-/// The PDF Reference doesn't actually speak of tokens.
-struct pdf_object {
- enum type {
- END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
- // Simple tokens
- B_ARRAY, E_ARRAY, B_DICT, E_DICT,
- // Higher-level objects
- ARRAY, DICT, OBJECT, REFERENCE,
- } type = END;
-
- std::string string; ///< END (error message), COMMENT/KEYWORD/NAME/STRING
- double number = 0.; ///< BOOL, NUMERIC
- std::vector<pdf_object> array; ///< ARRAY, OBJECT
- std::map<std::string, pdf_object> dict; ///< DICT, in the future also STREAM
- uint n = 0, generation = 0; ///< OBJECT, REFERENCE
-
- pdf_object(enum type type = END) : type(type) {}
- pdf_object(enum type type, double v) : type(type), number(v) {}
- pdf_object(enum type type, const std::string& v) : type(type), string(v) {}
- pdf_object(enum type type, uint n, uint g) : type(type), n(n), generation(g) {}
- pdf_object(const std::vector<pdf_object>& array) : type(ARRAY), array(array) {}
- pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
-
- pdf_object(const pdf_object&) = default;
- pdf_object(pdf_object&&) = default;
- pdf_object& operator=(const pdf_object&) = default;
- pdf_object& operator=(pdf_object&&) = default;
-
- /// Return whether this is a number without a fractional part
- bool is_integer() const {
- double tmp;
- return type == NUMERIC && std::modf(number, &tmp) == 0.;
- }
-};
-
-/// Basic lexical analyser for the Portable Document Format, giving limited error information
-struct pdf_lexer {
- const unsigned char* p;
- pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
-
- static constexpr const char* oct_alphabet = "01234567";
- static constexpr const char* dec_alphabet = "0123456789";
- static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
- static constexpr const char* whitespace = "\t\n\f\r ";
- static constexpr const char* delimiters = "()<>[]{}/%";
-
- bool eat_newline(int ch) {
- if (ch == '\r') {
- if (*p == '\n') p++;
- return true;
- }
- return ch == '\n';
- }
-
- pdf_object string() {
- std::string value;
- int parens = 1;
- while (1) {
- if (!*p) return {pdf_object::END, "unexpected end of string"};
- auto ch = *p++;
- if (eat_newline(ch)) ch = '\n';
- else if (ch == '(') { parens++; }
- else if (ch == ')') { if (!--parens) break; }
- else if (ch == '\\') {
- if (!*p) return {pdf_object::END, "unexpected end of string"};
- switch ((ch = *p++)) {
- case 'n': ch = '\n'; break;
- case 'r': ch = '\r'; break;
- case 't': ch = '\t'; break;
- case 'b': ch = '\b'; break;
- case 'f': ch = '\f'; break;
- default:
- if (eat_newline(ch))
- continue;
- std::string octal;
- if (ch && strchr(oct_alphabet, ch)) {
- octal += ch;
- if (*p && strchr(oct_alphabet, *p)) octal += *p++;
- if (*p && strchr(oct_alphabet, *p)) octal += *p++;
- ch = std::stoi(octal, nullptr, 8);
- }
- }
- }
- value += ch;
- }
- return {pdf_object::STRING, value};
- }
-
- pdf_object string_hex() {
- std::string value, buf;
- while (*p != '>') {
- if (!*p) return {pdf_object::END, "unexpected end of hex string"};
- if (!strchr(hex_alphabet, *p))
- return {pdf_object::END, "invalid hex string"};
- buf += *p++;
- if (buf.size() == 2) {
- value += char(std::stoi(buf, nullptr, 16));
- buf.clear();
- }
- }
- p++;
- if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
- return {pdf_object::STRING, value};
- }
-
- pdf_object name() {
- std::string value;
- while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
- auto ch = *p++;
- if (ch == '#') {
- std::string hexa;
- if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
- if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
- if (hexa.size() != 2)
- return {pdf_object::END, "invalid name hexa escape"};
- ch = char(std::stoi(hexa, nullptr, 16));
- }
- value += ch;
- }
- if (value.empty()) return {pdf_object::END, "unexpected end of name"};
- return {pdf_object::NAME, value};
- }
-
- pdf_object comment() {
- std::string value;
- while (*p && *p != '\r' && *p != '\n')
- value += *p++;
- return {pdf_object::COMMENT, value};
- }
-
- // XXX maybe invalid numbers should rather be interpreted as keywords
- pdf_object number() {
- std::string value;
- if (*p == '-')
- value += *p++;
- bool real = false, digits = false;
- while (*p) {
- if (strchr(dec_alphabet, *p))
- digits = true;
- else if (*p == '.' && !real)
- real = true;
- else
- break;
- value += *p++;
- }
- if (!digits) return {pdf_object::END, "invalid number"};
- return {pdf_object::NUMERIC, std::stod(value, nullptr)};
- }
-
- pdf_object next() {
- if (!*p)
- return {pdf_object::END};
- if (strchr("-0123456789.", *p))
- return number();
-
- // {} end up being keywords, we might want to error out on those
- std::string value;
- while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
- value += *p++;
- if (!value.empty()) {
- if (value == "null") return {pdf_object::NIL};
- if (value == "true") return {pdf_object::BOOL, 1};
- if (value == "false") return {pdf_object::BOOL, 0};
- return {pdf_object::KEYWORD, value};
- }
-
- switch (char ch = *p++) {
- case '/': return name();
- case '%': return comment();
- case '(': return string();
- case '[': return {pdf_object::B_ARRAY};
- case ']': return {pdf_object::E_ARRAY};
- case '<':
- if (*p++ == '<')
- return {pdf_object::B_DICT};
- p--;
- return string_hex();
- case '>':
- if (*p++ == '>')
- return {pdf_object::E_DICT};
- p--;
- return {pdf_object::END, "unexpected '>'"};
- default:
- if (eat_newline(ch))
- return {pdf_object::NL};
- if (strchr(whitespace, ch))
- return next();
- return {pdf_object::END, "unexpected input"};
- }
- }
-};
-
-// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
-static std::string pdf_serialize(const pdf_object& o) {
- switch (o.type) {
- case pdf_object::NL: return "\n";
- case pdf_object::NIL: return "null";
- case pdf_object::BOOL: return o.number ? "true" : "false";
- case pdf_object::NUMERIC:
- {
- if (o.is_integer()) return std::to_string((long long) o.number);
- return std::to_string(o.number);
- }
- case pdf_object::KEYWORD: return o.string;
- case pdf_object::NAME:
- {
- std::string escaped = "/";
- for (char c : o.string) {
- if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
- escaped += ssprintf("#%02x", c);
- else
- escaped += c;
- }
- return escaped;
- }
- case pdf_object::STRING:
- {
- std::string escaped;
- for (char c : o.string) {
- if (c == '\\' || c == '(' || c == ')')
- escaped += '\\';
- escaped += c;
- }
- return "(" + escaped + ")";
- }
- case pdf_object::B_ARRAY: return "[";
- case pdf_object::E_ARRAY: return "]";
- case pdf_object::B_DICT: return "<<";
- case pdf_object::E_DICT: return ">>";
- case pdf_object::ARRAY:
- {
- std::vector<std::string> v;
- for (const auto& i : o.array)
- v.push_back(pdf_serialize(i));
- return "[ " + concatenate(v, " ") + " ]";
- }
- case pdf_object::DICT:
- {
- std::string s;
- for (const auto i : o.dict)
- // FIXME the key is also supposed to be escaped by pdf_serialize()
- s += " /" + i.first + " " + pdf_serialize(i.second);
- return "<<" + s + " >>";
- }
- case pdf_object::OBJECT:
- return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
- case pdf_object::REFERENCE:
- return ssprintf("%u %u R", o.n, o.generation);
- default:
- assert(!"unsupported token for serialization");
- }
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// Utility class to help read and possibly incrementally update PDF files
-class pdf_updater {
- struct ref {
- size_t offset = 0; ///< File offset or N of the next free entry
- uint generation = 0; ///< Object generation
- bool free = true; ///< Whether this N has been deleted
- };
-
- std::vector<ref> xref; ///< Cross-reference table
- size_t xref_size = 0; ///< Current cross-reference table size, correlated to xref.size()
- std::set<uint> updated; ///< List of updated objects
-
- pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
- pdf_object parse_R(std::vector<pdf_object>& stack) const;
- pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
- std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
-
-public:
- /// The new trailer dictionary to be written, initialized with the old one
- std::map<std::string, pdf_object> trailer;
-
- std::string& document;
- pdf_updater(std::string& document) : document(document) {}
-
- /// Build the cross-reference table and prepare a new trailer dictionary
- std::string initialize();
- /// Retrieve an object by its number and generation -- may return NIL or END with an error
- pdf_object get(uint n, uint generation) const;
- /// Allocate a new object number
- uint allocate();
- /// Append an updated object to the end of the document
- void update(uint n, std::function<void()> fill);
- /// Write an updated cross-reference table and trailer
- void flush_updates();
-};
-
-// -------------------------------------------------------------------------------------------------
-
-/// If the object is an error, forward its message, otherwise return err.
-static std::string pdf_error(const pdf_object& o, const char* err) {
- if (o.type != pdf_object::END || o.string.empty()) return err;
- return o.string;
-}
-
-pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
- if (stack.size() < 2)
- return {pdf_object::END, "missing object ID pair"};
-
- auto g = stack.back(); stack.pop_back();
- auto n = stack.back(); stack.pop_back();
- if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
- || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
- return {pdf_object::END, "invalid object ID pair"};
-
- pdf_object obj{pdf_object::OBJECT};
- obj.n = n.number;
- obj.generation = g.number;
-
- while (1) {
- auto object = parse(lex, obj.array);
- if (object.type == pdf_object::END)
- return {pdf_object::END, pdf_error(object, "object doesn't end")};
- if (object.type == pdf_object::KEYWORD && object.string == "endobj")
- break;
- obj.array.push_back(std::move(object));
- }
- return obj;
-}
-
-pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
- if (stack.size() < 2)
- return {pdf_object::END, "missing reference ID pair"};
-
- auto g = stack.back(); stack.pop_back();
- auto n = stack.back(); stack.pop_back();
- if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
- || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
- return {pdf_object::END, "invalid reference ID pair"};
-
- pdf_object ref{pdf_object::REFERENCE};
- ref.n = n.number;
- ref.generation = g.number;
- return ref;
-}
-
-/// Read an object at the lexer's position. Not a strict parser.
-pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
- auto token = lex.next();
- switch (token.type) {
- case pdf_object::NL:
- case pdf_object::COMMENT:
- // These are not important to parsing, not even for this procedure's needs
- return parse(lex, stack);
- case pdf_object::B_ARRAY:
- {
- std::vector<pdf_object> array;
- while (1) {
- auto object = parse(lex, array);
- if (object.type == pdf_object::END)
- return {pdf_object::END, pdf_error(object, "array doesn't end")};
- if (object.type == pdf_object::E_ARRAY)
- break;
- array.push_back(std::move(object));
- }
- return array;
- }
- case pdf_object::B_DICT:
- {
- std::vector<pdf_object> array;
- while (1) {
- auto object = parse(lex, array);
- if (object.type == pdf_object::END)
- return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
- if (object.type == pdf_object::E_DICT)
- break;
- array.push_back(std::move(object));
- }
- if (array.size() % 2)
- return {pdf_object::END, "unbalanced dictionary"};
- std::map<std::string, pdf_object> dict;
- for (size_t i = 0; i < array.size(); i += 2) {
- if (array[i].type != pdf_object::NAME)
- return {pdf_object::END, "invalid dictionary key type"};
- dict.insert({array[i].string, std::move(array[i + 1])});
- }
- return dict;
- }
- case pdf_object::KEYWORD:
- // Appears in the document body, typically needs to access the cross-reference table
- // TODO use the xref to read /Length etc. once we actually need to read such objects;
- // presumably streams can use the pdf_object::string member
- if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
- if (token.string == "obj") return parse_obj(lex, stack);
- if (token.string == "R") return parse_R(stack);
- return token;
- default:
- return token;
- }
-}
-
-std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
- std::vector<pdf_object> throwaway_stack;
- {
- auto keyword = parse(lex, throwaway_stack);
- if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
- return "invalid xref table";
- }
- while (1) {
- auto object = parse(lex, throwaway_stack);
- if (object.type == pdf_object::END)
- return pdf_error(object, "unexpected EOF while looking for the trailer");
- if (object.type == pdf_object::KEYWORD && object.string == "trailer")
- break;
-
- auto second = parse(lex, throwaway_stack);
- if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
- || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
- return "invalid xref section header";
-
- const size_t start = object.number;
- const size_t count = second.number;
- for (size_t i = 0; i < count; i++) {
- auto off = parse(lex, throwaway_stack);
- auto gen = parse(lex, throwaway_stack);
- auto key = parse(lex, throwaway_stack);
- if (!off.is_integer() || off.number < 0 || off.number > document.length()
- || !gen.is_integer() || gen.number < 0 || gen.number > 65535
- || key.type != pdf_object::KEYWORD)
- return "invalid xref entry";
-
- bool free = true;
- if (key.string == "n")
- free = false;
- else if (key.string != "f")
- return "invalid xref entry";
-
- auto n = start + i;
- if (loaded_entries.count(n))
- continue;
- if (n >= xref.size())
- xref.resize(n + 1);
- loaded_entries.insert(n);
-
- auto& ref = xref[n];
- ref.generation = gen.number;
- ref.offset = off.number;
- ref.free = free;
- }
- }
- return "";
-}
-
-// -------------------------------------------------------------------------------------------------
-
-std::string pdf_updater::initialize() {
- // We only need to look for startxref roughly within the last kibibyte of the document
- static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
- std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
-
- std::smatch m;
- if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
- return "cannot find startxref";
-
- size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
- std::set<size_t> loaded_xrefs;
- std::set<uint> loaded_entries;
-
- std::vector<pdf_object> throwaway_stack;
- while (1) {
- if (loaded_xrefs.count(xref_offset))
- return "circular xref offsets";
- if (xref_offset >= document.length())
- return "invalid xref offset";
-
- pdf_lexer lex(document.c_str() + xref_offset);
- auto err = load_xref(lex, loaded_entries);
- if (!err.empty()) return err;
-
- auto trailer = parse(lex, throwaway_stack);
- if (trailer.type != pdf_object::DICT)
- return pdf_error(trailer, "invalid trailer dictionary");
- if (loaded_xrefs.empty())
- this->trailer = trailer.dict;
- loaded_xrefs.insert(xref_offset);
-
- const auto prev_offset = trailer.dict.find("Prev");
- if (prev_offset == trailer.dict.end())
- break;
- // FIXME we don't check for size_t over or underflow
- if (!prev_offset->second.is_integer())
- return "invalid Prev offset";
- xref_offset = prev_offset->second.number;
- }
-
- trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
- const auto last_size = trailer.find("Size");
- if (last_size == trailer.end() || !last_size->second.is_integer() ||
- last_size->second.number <= 0)
- return "invalid or missing cross-reference table Size";
-
- xref_size = last_size->second.number;
- return "";
-}
-
-pdf_object pdf_updater::get(uint n, uint generation) const {
- if (n >= xref_size)
- return {pdf_object::NIL};
-
- const auto& ref = xref[n];
- if (ref.free || ref.generation != generation || ref.offset >= document.length())
- return {pdf_object::NIL};
-
- pdf_lexer lex(document.c_str() + ref.offset);
- std::vector<pdf_object> stack;
- while (1) {
- auto object = parse(lex, stack);
- if (object.type == pdf_object::END)
- return object;
- if (object.type != pdf_object::OBJECT)
- stack.push_back(std::move(object));
- else if (object.n != n || object.generation != generation)
- return {pdf_object::END, "object mismatch"};
- else
- return std::move(object.array.at(0));
- }
-}
-
-uint pdf_updater::allocate() {
- assert(xref_size < UINT_MAX);
-
- auto n = xref_size++;
- if (xref.size() < xref_size)
- xref.resize(xref_size);
-
- // We don't make sure it gets a subsection in the update yet because we
- // make no attempts at fixing the linked list of free items either
- return n;
-}
-
-void pdf_updater::update(uint n, std::function<void()> fill) {
- auto& ref = xref.at(n);
- ref.offset = document.length() + 1;
- ref.free = false;
- updated.insert(n);
-
- document += ssprintf("\n%u %u obj\n", n, ref.generation);
- // Separately so that the callback can use document.length() to get the current offset
- fill();
- document += "\nendobj";
-}
-
-void pdf_updater::flush_updates() {
- std::map<uint, size_t> groups;
- for (auto i = updated.cbegin(); i != updated.cend(); ) {
- size_t start = *i, count = 1;
- while (++i != updated.cend() && *i == start + count)
- count++;
- groups[start] = count;
- }
-
- // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
- // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
- if (groups.empty())
- groups[0] = 0;
-
- auto startxref = document.length() + 1;
- document += "\nxref\n";
- for (const auto& g : groups) {
- document += ssprintf("%u %zu\n", g.first, g.second);
- for (size_t i = 0; i < g.second; i++) {
- auto& ref = xref[g.first + i];
- document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
- }
- }
-
- trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
- document += "trailer\n" + pdf_serialize(trailer)
- + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// Make a PDF object representing the given point in time
-static pdf_object pdf_date(time_t timestamp) {
- struct tm parts;
- assert(localtime_r(&timestamp, &parts));
-
- char buf[64];
- assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
-
- std::string offset = "Z";
- auto offset_min = parts.tm_gmtoff / 60;
- if (parts.tm_gmtoff < 0)
- offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
- if (parts.tm_gmtoff > 0)
- offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
- return {pdf_object::STRING, buf + offset};
-}
-
-static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
- auto obj = pdf.get(node_n, node_generation);
- if (obj.type != pdf_object::DICT)
- return {pdf_object::NIL};
-
- // Out of convenience; these aren't filled normally
- obj.n = node_n;
- obj.generation = node_generation;
-
- auto type = obj.dict.find("Type");
- if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
- return {pdf_object::NIL};
- if (type->second.string == "Page")
- return obj;
- if (type->second.string != "Pages")
- return {pdf_object::NIL};
-
- // XXX technically speaking, this may be an indirect reference. The correct way to solve this
- // seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
- auto kids = obj.dict.find("Kids");
- if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
- || kids->second.array.empty()
- || kids->second.array.at(0).type != pdf_object::REFERENCE)
- return {pdf_object::NIL};
-
- // XXX nothing prevents us from recursing in an evil circular graph
- return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-static std::string pkcs12_path, pkcs12_pass;
-
-// /All/ bytes are checked, except for the signature hexstring itself
-static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len) {
- size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
- if (pkcs12_path.empty())
- return "undefined path to the signing key";
-
- auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
- if (!pkcs12_fp)
- return pkcs12_path + ": " + strerror(errno);
-
- // Abandon hope, all ye who enter OpenSSL! Half of it is undocumented.
- OpenSSL_add_all_algorithms();
- ERR_load_crypto_strings();
- ERR_clear_error();
-
- PKCS12* p12 = nullptr;
- EVP_PKEY* private_key = nullptr;
- X509* certificate = nullptr;
- STACK_OF(X509)* chain = nullptr;
- PKCS7* p7 = nullptr;
- int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
- BIO* p7bio = nullptr;
- unsigned char* buf = nullptr;
-
- // OpenSSL error reasons will usually be of more value than any distinction I can come up with
- std::string err = "OpenSSL failure";
-
- if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
- || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
- err = pkcs12_path + ": parse failure";
- goto error;
- }
- if (!private_key || !certificate) {
- err = pkcs12_path + ": must contain a private key and a valid certificate chain";
- goto error;
- }
- // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
- if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
- err = "the certificate's key usage must include digital signatures or non-repudiation";
- goto error;
- }
- if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
- err = "the certificate's extended key usage must include S/MIME";
- goto error;
- }
-#if 0 // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
- if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
- err = "the certificate can't be used for S/MIME digital signatures";
- goto error;
- }
-#endif
-
- // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
- if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
- || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
- goto error;
- // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
- // PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
- for (int i = 0; i < sk_X509_num(chain); i++)
- if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
- goto error;
-
- // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
- // a copy of the whole document. Hopefully this writes directly into a digest BIO.
- if (!(p7bio = PKCS7_dataInit(p7, nullptr))
- || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
- || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
- || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
- goto error;
-
-#if 0
- {
- // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
- // Context: https://stackoverflow.com/a/29253469
- auto fp = fopen("pdf_signature.pem", "wb");
- assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
- }
-#endif
-
- if ((len = i2d_PKCS7(p7, &buf)) < 0)
- goto error;
- if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
- // The obvious solution is to increase the allocation... or spend a week reading specifications
- // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
- err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
- sign_len - 2, size_t(len) * 2);
- goto error;
- }
- for (int i = 0; i < len; i++) {
- document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
- document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
- }
- err.clear();
-
-error:
- OPENSSL_free(buf);
- BIO_free_all(p7bio);
- PKCS7_free(p7);
- sk_X509_pop_free(chain, X509_free);
- X509_free(certificate);
- EVP_PKEY_free(private_key);
- PKCS12_free(p12);
-
- // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
- while (auto code = ERR_get_error())
- if (auto reason = ERR_reason_error_string(code))
- err = err + "; " + reason;
-
- fclose(pkcs12_fp);
- return err;
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// The presumption here is that the document is valid and that it doesn't employ cross-reference
-/// streams from PDF 1.5, or at least constitutes a hybrid-reference file. The results with
-/// PDF 2.0 (2017) are currently unknown as the standard costs money.
-///
-/// Carelessly assumes that the version of the original document is at most PDF 1.6.
-///
-/// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
-/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
-/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
-static std::string pdf_sign(std::string& document) {
- pdf_updater pdf(document);
- auto err = pdf.initialize();
- if (!err.empty())
- return err;
-
- auto root_ref = pdf.trailer.find("Root");
- if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
- return "trailer does not contain a reference to Root";
- auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
- if (root.type != pdf_object::DICT)
- return "invalid Root dictionary reference";
-
- // 8.7 Digital Signatures - /signature dictionary/
- auto sigdict_n = pdf.allocate();
- size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
- pdf.update(sigdict_n, [&]{
- // The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161.
- pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
- " /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
- byterange_off = pdf.document.size();
- pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
- pdf.document.append("\n /Contents <");
- sign_off = pdf.document.size();
- pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
- pdf.document.append("> >>");
-
- // We actually need to exclude the hexstring quotes from signing
- sign_off -= 1;
- sign_len += 2;
- });
-
- // 8.6.3 Field Types - Signature Fields
- pdf_object sigfield{pdf_object::DICT};
- sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
- sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
- // 8.4.5 Annotations Types - Widget Annotations
- // We can merge the Signature Annotation and omit Kids here
- sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
- sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
- sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
- sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
- {pdf_object::NUMERIC, 0},
- {pdf_object::NUMERIC, 0},
- {pdf_object::NUMERIC, 0},
- {pdf_object::NUMERIC, 0},
- }}});
-
- auto sigfield_n = pdf.allocate();
- pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
-
- auto pages_ref = root.dict.find("Pages");
- if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
- return "invalid Pages reference";
- auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
- if (page.type != pdf_object::DICT)
- return "invalid or unsupported page tree";
-
- // XXX assuming this won't be an indirectly referenced array
- auto& annots = page.dict["Annots"];
- if (annots.type != pdf_object::ARRAY)
- annots = {pdf_object::ARRAY};
- annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
- pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
-
- // 8.6.1 Interactive Form Dictionary
- // XXX assuming there are no forms already, overwriting everything
- root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
- {"Fields", {std::vector<pdf_object>{
- {pdf_object::REFERENCE, sigfield_n, 0}
- }}},
- {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
- }};
-
- // Upgrade the document version for SHA-256 etc.
- // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
- // version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
- root.dict["Version"] = {pdf_object::NAME, "1.6"};
- pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
- pdf.flush_updates();
-
- // Now that we know the length of everything, store byte ranges of what we're about to sign,
- // which must be everything but the resulting signature itself
- size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
- auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
- if (ranges.length() > byterange_len)
- return "not enough space reserved for /ByteRange";
- pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
- return pdf_fill_in_signature(pdf.document, sign_off, sign_len);
-}
-// -------------------------------------------------------------------------------------------------
+#include "pdf-simple-sign.h"
__attribute__((format(printf, 2, 3)))
static void die(int status, const char* format, ...) {
va_list ap;
va_start(ap, format);
if (isatty(fileno(stderr)))
- vfprintf(stderr, ssprintf("\x1b[31m%s\x1b[0m\n", format).c_str(), ap);
+ vfprintf(stderr, ("\x1b[31m" + std::string(format) + "\x1b[0m\n").c_str(), ap);
else
vfprintf(stderr, format, ap);
va_end(ap);
@@ -949,8 +72,6 @@ int main(int argc, char* argv[]) {
const char* input_path = argv[0];
const char* output_path = argv[1];
- pkcs12_path = argv[2];
- pkcs12_pass = argv[3];
std::string pdf_document;
if (auto fp = fopen(input_path, "rb")) {
@@ -964,7 +85,7 @@ int main(int argc, char* argv[]) {
die(1, "%s: %s", input_path, strerror(errno));
}
- auto err = pdf_sign(pdf_document);
+ auto err = pdf_simple_sign(pdf_document, argv[2], argv[3]);
if (!err.empty()) {
die(2, "Error: %s", err.c_str());
}
diff --git a/pdf-simple-sign.h b/pdf-simple-sign.h
new file mode 100644
index 0000000..022731c
--- /dev/null
+++ b/pdf-simple-sign.h
@@ -0,0 +1,28 @@
+// vim: set sw=2 ts=2 sts=2 et tw=100:
+//
+// pdf-simple-sign: simple PDF signer
+//
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#pragma once
+
+#include <string>
+
+/// Sign basic PDF documents, as generated by e.g. Cairo, using the key-certificate pair
+/// stored in the PKCS#12 file named `pkcs12_path`, with password `pkcs12_pass`.
+/// Returns a non-empty error string on failure.
+std::string pdf_simple_sign(std::string& document,
+ const std::string& pkcs12_path,
+ const std::string& pkcs12_pass);
diff --git a/pdf.cpp b/pdf.cpp
new file mode 100644
index 0000000..867c3d4
--- /dev/null
+++ b/pdf.cpp
@@ -0,0 +1,906 @@
+// vim: set sw=2 ts=2 sts=2 et tw=100:
+//
+// pdf-simple-sign: simple PDF signer
+//
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <cmath>
+#undef NDEBUG
+#include <cassert>
+
+#include <vector>
+#include <map>
+#include <regex>
+#include <memory>
+#include <set>
+
+#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
+#error Need libstdc++ >= 4.9 for <regex>
+#endif
+
+#include <openssl/err.h>
+#include <openssl/x509v3.h>
+#include <openssl/pkcs12.h>
+
+#include "pdf-simple-sign.h"
+
+namespace {
+
+using uint = unsigned int;
+
+static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
+ std::string res;
+ if (v.empty())
+ return res;
+ for (const auto& s : v)
+ res += s + delim;
+ return res.substr(0, res.length() - delim.length());
+}
+
+template<typename... Args>
+static std::string ssprintf(const std::string& format, Args... args) {
+ size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
+ std::unique_ptr<char[]> buf(new char[size]);
+ std::snprintf(buf.get(), size, format.c_str(), args...);
+ return std::string(buf.get(), buf.get() + size - 1);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
+/// The PDF Reference doesn't actually speak of tokens.
+struct pdf_object {
+ enum type {
+ END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
+ // Simple tokens
+ B_ARRAY, E_ARRAY, B_DICT, E_DICT,
+ // Higher-level objects
+ ARRAY, DICT, OBJECT, REFERENCE,
+ } type = END;
+
+ std::string string; ///< END (error message), COMMENT/KEYWORD/NAME/STRING
+ double number = 0.; ///< BOOL, NUMERIC
+ std::vector<pdf_object> array; ///< ARRAY, OBJECT
+ std::map<std::string, pdf_object> dict; ///< DICT, in the future also STREAM
+ uint n = 0, generation = 0; ///< OBJECT, REFERENCE
+
+ pdf_object(enum type type = END) : type(type) {}
+ pdf_object(enum type type, double v) : type(type), number(v) {}
+ pdf_object(enum type type, const std::string& v) : type(type), string(v) {}
+ pdf_object(enum type type, uint n, uint g) : type(type), n(n), generation(g) {}
+ pdf_object(const std::vector<pdf_object>& array) : type(ARRAY), array(array) {}
+ pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
+
+ pdf_object(const pdf_object&) = default;
+ pdf_object(pdf_object&&) = default;
+ pdf_object& operator=(const pdf_object&) = default;
+ pdf_object& operator=(pdf_object&&) = default;
+
+ /// Return whether this is a number without a fractional part
+ bool is_integer() const {
+ double tmp;
+ return type == NUMERIC && std::modf(number, &tmp) == 0.;
+ }
+};
+
+/// Basic lexical analyser for the Portable Document Format, giving limited error information
+struct pdf_lexer {
+ const unsigned char* p;
+ pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
+
+ static constexpr const char* oct_alphabet = "01234567";
+ static constexpr const char* dec_alphabet = "0123456789";
+ static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
+ static constexpr const char* whitespace = "\t\n\f\r ";
+ static constexpr const char* delimiters = "()<>[]{}/%";
+
+ bool eat_newline(int ch) {
+ if (ch == '\r') {
+ if (*p == '\n') p++;
+ return true;
+ }
+ return ch == '\n';
+ }
+
+ pdf_object string() {
+ std::string value;
+ int parens = 1;
+ while (1) {
+ if (!*p) return {pdf_object::END, "unexpected end of string"};
+ auto ch = *p++;
+ if (eat_newline(ch)) ch = '\n';
+ else if (ch == '(') { parens++; }
+ else if (ch == ')') { if (!--parens) break; }
+ else if (ch == '\\') {
+ if (!*p) return {pdf_object::END, "unexpected end of string"};
+ switch ((ch = *p++)) {
+ case 'n': ch = '\n'; break;
+ case 'r': ch = '\r'; break;
+ case 't': ch = '\t'; break;
+ case 'b': ch = '\b'; break;
+ case 'f': ch = '\f'; break;
+ default:
+ if (eat_newline(ch))
+ continue;
+ std::string octal;
+ if (ch && strchr(oct_alphabet, ch)) {
+ octal += ch;
+ if (*p && strchr(oct_alphabet, *p)) octal += *p++;
+ if (*p && strchr(oct_alphabet, *p)) octal += *p++;
+ ch = std::stoi(octal, nullptr, 8);
+ }
+ }
+ }
+ value += ch;
+ }
+ return {pdf_object::STRING, value};
+ }
+
+ pdf_object string_hex() {
+ std::string value, buf;
+ while (*p != '>') {
+ if (!*p) return {pdf_object::END, "unexpected end of hex string"};
+ if (!strchr(hex_alphabet, *p))
+ return {pdf_object::END, "invalid hex string"};
+ buf += *p++;
+ if (buf.size() == 2) {
+ value += char(std::stoi(buf, nullptr, 16));
+ buf.clear();
+ }
+ }
+ p++;
+ if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
+ return {pdf_object::STRING, value};
+ }
+
+ pdf_object name() {
+ std::string value;
+ while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
+ auto ch = *p++;
+ if (ch == '#') {
+ std::string hexa;
+ if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
+ if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
+ if (hexa.size() != 2)
+ return {pdf_object::END, "invalid name hexa escape"};
+ ch = char(std::stoi(hexa, nullptr, 16));
+ }
+ value += ch;
+ }
+ if (value.empty()) return {pdf_object::END, "unexpected end of name"};
+ return {pdf_object::NAME, value};
+ }
+
+ pdf_object comment() {
+ std::string value;
+ while (*p && *p != '\r' && *p != '\n')
+ value += *p++;
+ return {pdf_object::COMMENT, value};
+ }
+
+ // XXX maybe invalid numbers should rather be interpreted as keywords
+ pdf_object number() {
+ std::string value;
+ if (*p == '-')
+ value += *p++;
+ bool real = false, digits = false;
+ while (*p) {
+ if (strchr(dec_alphabet, *p))
+ digits = true;
+ else if (*p == '.' && !real)
+ real = true;
+ else
+ break;
+ value += *p++;
+ }
+ if (!digits) return {pdf_object::END, "invalid number"};
+ return {pdf_object::NUMERIC, std::stod(value, nullptr)};
+ }
+
+ pdf_object next() {
+ if (!*p)
+ return {pdf_object::END};
+ if (strchr("-0123456789.", *p))
+ return number();
+
+ // {} end up being keywords, we might want to error out on those
+ std::string value;
+ while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
+ value += *p++;
+ if (!value.empty()) {
+ if (value == "null") return {pdf_object::NIL};
+ if (value == "true") return {pdf_object::BOOL, 1};
+ if (value == "false") return {pdf_object::BOOL, 0};
+ return {pdf_object::KEYWORD, value};
+ }
+
+ switch (char ch = *p++) {
+ case '/': return name();
+ case '%': return comment();
+ case '(': return string();
+ case '[': return {pdf_object::B_ARRAY};
+ case ']': return {pdf_object::E_ARRAY};
+ case '<':
+ if (*p++ == '<')
+ return {pdf_object::B_DICT};
+ p--;
+ return string_hex();
+ case '>':
+ if (*p++ == '>')
+ return {pdf_object::E_DICT};
+ p--;
+ return {pdf_object::END, "unexpected '>'"};
+ default:
+ if (eat_newline(ch))
+ return {pdf_object::NL};
+ if (strchr(whitespace, ch))
+ return next();
+ return {pdf_object::END, "unexpected input"};
+ }
+ }
+};
+
+// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
+static std::string pdf_serialize(const pdf_object& o) {
+ switch (o.type) {
+ case pdf_object::NL: return "\n";
+ case pdf_object::NIL: return "null";
+ case pdf_object::BOOL: return o.number ? "true" : "false";
+ case pdf_object::NUMERIC:
+ {
+ if (o.is_integer()) return std::to_string((long long) o.number);
+ return std::to_string(o.number);
+ }
+ case pdf_object::KEYWORD: return o.string;
+ case pdf_object::NAME:
+ {
+ std::string escaped = "/";
+ for (char c : o.string) {
+ if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
+ escaped += ssprintf("#%02x", c);
+ else
+ escaped += c;
+ }
+ return escaped;
+ }
+ case pdf_object::STRING:
+ {
+ std::string escaped;
+ for (char c : o.string) {
+ if (c == '\\' || c == '(' || c == ')')
+ escaped += '\\';
+ escaped += c;
+ }
+ return "(" + escaped + ")";
+ }
+ case pdf_object::B_ARRAY: return "[";
+ case pdf_object::E_ARRAY: return "]";
+ case pdf_object::B_DICT: return "<<";
+ case pdf_object::E_DICT: return ">>";
+ case pdf_object::ARRAY:
+ {
+ std::vector<std::string> v;
+ for (const auto& i : o.array)
+ v.push_back(pdf_serialize(i));
+ return "[ " + concatenate(v, " ") + " ]";
+ }
+ case pdf_object::DICT:
+ {
+ std::string s;
+ for (const auto& i : o.dict)
+ // FIXME the key is also supposed to be escaped by pdf_serialize()
+ s += " /" + i.first + " " + pdf_serialize(i.second);
+ return "<<" + s + " >>";
+ }
+ case pdf_object::OBJECT:
+ return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
+ case pdf_object::REFERENCE:
+ return ssprintf("%u %u R", o.n, o.generation);
+ default:
+ assert(!"unsupported token for serialization");
+ }
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// Utility class to help read and possibly incrementally update PDF files
+class pdf_updater {
+ struct ref {
+ size_t offset = 0; ///< File offset or N of the next free entry
+ uint generation = 0; ///< Object generation
+ bool free = true; ///< Whether this N has been deleted
+ };
+
+ std::vector<ref> xref; ///< Cross-reference table
+ size_t xref_size = 0; ///< Current cross-reference table size, correlated to xref.size()
+ std::set<uint> updated; ///< List of updated objects
+
+ pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
+ pdf_object parse_R(std::vector<pdf_object>& stack) const;
+ pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
+ std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
+
+public:
+ /// The new trailer dictionary to be written, initialized with the old one
+ std::map<std::string, pdf_object> trailer;
+
+ std::string& document;
+ pdf_updater(std::string& document) : document(document) {}
+
+ /// Build the cross-reference table and prepare a new trailer dictionary
+ std::string initialize();
+ /// Retrieve an object by its number and generation -- may return NIL or END with an error
+ pdf_object get(uint n, uint generation) const;
+ /// Allocate a new object number
+ uint allocate();
+ /// Append an updated object to the end of the document
+ void update(uint n, std::function<void()> fill);
+ /// Write an updated cross-reference table and trailer
+ void flush_updates();
+};
+
+// -------------------------------------------------------------------------------------------------
+
+/// If the object is an error, forward its message, otherwise return err.
+static std::string pdf_error(const pdf_object& o, const char* err) {
+ if (o.type != pdf_object::END || o.string.empty()) return err;
+ return o.string;
+}
+
+pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
+ if (stack.size() < 2)
+ return {pdf_object::END, "missing object ID pair"};
+
+ auto g = stack.back(); stack.pop_back();
+ auto n = stack.back(); stack.pop_back();
+ if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+ || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+ return {pdf_object::END, "invalid object ID pair"};
+
+ pdf_object obj{pdf_object::OBJECT};
+ obj.n = n.number;
+ obj.generation = g.number;
+
+ while (1) {
+ auto object = parse(lex, obj.array);
+ if (object.type == pdf_object::END)
+ return {pdf_object::END, pdf_error(object, "object doesn't end")};
+ if (object.type == pdf_object::KEYWORD && object.string == "endobj")
+ break;
+ obj.array.push_back(std::move(object));
+ }
+ return obj;
+}
+
+pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
+ if (stack.size() < 2)
+ return {pdf_object::END, "missing reference ID pair"};
+
+ auto g = stack.back(); stack.pop_back();
+ auto n = stack.back(); stack.pop_back();
+ if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+ || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+ return {pdf_object::END, "invalid reference ID pair"};
+
+ pdf_object ref{pdf_object::REFERENCE};
+ ref.n = n.number;
+ ref.generation = g.number;
+ return ref;
+}
+
+/// Read an object at the lexer's position. Not a strict parser.
+pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
+ auto token = lex.next();
+ switch (token.type) {
+ case pdf_object::NL:
+ case pdf_object::COMMENT:
+ // These are not important to parsing, not even for this procedure's needs
+ return parse(lex, stack);
+ case pdf_object::B_ARRAY:
+ {
+ std::vector<pdf_object> array;
+ while (1) {
+ auto object = parse(lex, array);
+ if (object.type == pdf_object::END)
+ return {pdf_object::END, pdf_error(object, "array doesn't end")};
+ if (object.type == pdf_object::E_ARRAY)
+ break;
+ array.push_back(std::move(object));
+ }
+ return array;
+ }
+ case pdf_object::B_DICT:
+ {
+ std::vector<pdf_object> array;
+ while (1) {
+ auto object = parse(lex, array);
+ if (object.type == pdf_object::END)
+ return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
+ if (object.type == pdf_object::E_DICT)
+ break;
+ array.push_back(std::move(object));
+ }
+ if (array.size() % 2)
+ return {pdf_object::END, "unbalanced dictionary"};
+ std::map<std::string, pdf_object> dict;
+ for (size_t i = 0; i < array.size(); i += 2) {
+ if (array[i].type != pdf_object::NAME)
+ return {pdf_object::END, "invalid dictionary key type"};
+ dict.insert({array[i].string, std::move(array[i + 1])});
+ }
+ return dict;
+ }
+ case pdf_object::KEYWORD:
+ // Appears in the document body, typically needs to access the cross-reference table
+ // TODO use the xref to read /Length etc. once we actually need to read such objects;
+ // presumably streams can use the pdf_object::string member
+ if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
+ if (token.string == "obj") return parse_obj(lex, stack);
+ if (token.string == "R") return parse_R(stack);
+ return token;
+ default:
+ return token;
+ }
+}
+
+std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
+ std::vector<pdf_object> throwaway_stack;
+ {
+ auto keyword = parse(lex, throwaway_stack);
+ if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
+ return "invalid xref table";
+ }
+ while (1) {
+ auto object = parse(lex, throwaway_stack);
+ if (object.type == pdf_object::END)
+ return pdf_error(object, "unexpected EOF while looking for the trailer");
+ if (object.type == pdf_object::KEYWORD && object.string == "trailer")
+ break;
+
+ auto second = parse(lex, throwaway_stack);
+ if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
+ || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
+ return "invalid xref section header";
+
+ const size_t start = object.number;
+ const size_t count = second.number;
+ for (size_t i = 0; i < count; i++) {
+ auto off = parse(lex, throwaway_stack);
+ auto gen = parse(lex, throwaway_stack);
+ auto key = parse(lex, throwaway_stack);
+ if (!off.is_integer() || off.number < 0 || off.number > document.length()
+ || !gen.is_integer() || gen.number < 0 || gen.number > 65535
+ || key.type != pdf_object::KEYWORD)
+ return "invalid xref entry";
+
+ bool free = true;
+ if (key.string == "n")
+ free = false;
+ else if (key.string != "f")
+ return "invalid xref entry";
+
+ auto n = start + i;
+ if (loaded_entries.count(n))
+ continue;
+ if (n >= xref.size())
+ xref.resize(n + 1);
+ loaded_entries.insert(n);
+
+ auto& ref = xref[n];
+ ref.generation = gen.number;
+ ref.offset = off.number;
+ ref.free = free;
+ }
+ }
+ return "";
+}
+
+// -------------------------------------------------------------------------------------------------
+
+std::string pdf_updater::initialize() {
+ // We only need to look for startxref roughly within the last kibibyte of the document
+ static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
+ std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
+
+ std::smatch m;
+ if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
+ return "cannot find startxref";
+
+ size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
+ std::set<size_t> loaded_xrefs;
+ std::set<uint> loaded_entries;
+
+ std::vector<pdf_object> throwaway_stack;
+ while (1) {
+ if (loaded_xrefs.count(xref_offset))
+ return "circular xref offsets";
+ if (xref_offset >= document.length())
+ return "invalid xref offset";
+
+ pdf_lexer lex(document.c_str() + xref_offset);
+ auto err = load_xref(lex, loaded_entries);
+ if (!err.empty()) return err;
+
+ auto trailer = parse(lex, throwaway_stack);
+ if (trailer.type != pdf_object::DICT)
+ return pdf_error(trailer, "invalid trailer dictionary");
+ if (loaded_xrefs.empty())
+ this->trailer = trailer.dict;
+ loaded_xrefs.insert(xref_offset);
+
+ const auto prev_offset = trailer.dict.find("Prev");
+ if (prev_offset == trailer.dict.end())
+ break;
+ // FIXME we don't check for size_t over or underflow
+ if (!prev_offset->second.is_integer())
+ return "invalid Prev offset";
+ xref_offset = prev_offset->second.number;
+ }
+
+ trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
+ const auto last_size = trailer.find("Size");
+ if (last_size == trailer.end() || !last_size->second.is_integer() ||
+ last_size->second.number <= 0)
+ return "invalid or missing cross-reference table Size";
+
+ xref_size = last_size->second.number;
+ return "";
+}
+
+pdf_object pdf_updater::get(uint n, uint generation) const {
+ if (n >= xref_size)
+ return {pdf_object::NIL};
+
+ const auto& ref = xref[n];
+ if (ref.free || ref.generation != generation || ref.offset >= document.length())
+ return {pdf_object::NIL};
+
+ pdf_lexer lex(document.c_str() + ref.offset);
+ std::vector<pdf_object> stack;
+ while (1) {
+ auto object = parse(lex, stack);
+ if (object.type == pdf_object::END)
+ return object;
+ if (object.type != pdf_object::OBJECT)
+ stack.push_back(std::move(object));
+ else if (object.n != n || object.generation != generation)
+ return {pdf_object::END, "object mismatch"};
+ else
+ return std::move(object.array.at(0));
+ }
+}
+
+uint pdf_updater::allocate() {
+ assert(xref_size < UINT_MAX);
+
+ auto n = xref_size++;
+ if (xref.size() < xref_size)
+ xref.resize(xref_size);
+
+ // We don't make sure it gets a subsection in the update yet because we
+ // make no attempts at fixing the linked list of free items either
+ return n;
+}
+
+void pdf_updater::update(uint n, std::function<void()> fill) {
+ auto& ref = xref.at(n);
+ ref.offset = document.length() + 1;
+ ref.free = false;
+ updated.insert(n);
+
+ document += ssprintf("\n%u %u obj\n", n, ref.generation);
+ // Separately so that the callback can use document.length() to get the current offset
+ fill();
+ document += "\nendobj";
+}
+
+void pdf_updater::flush_updates() {
+ std::map<uint, size_t> groups;
+ for (auto i = updated.cbegin(); i != updated.cend(); ) {
+ size_t start = *i, count = 1;
+ while (++i != updated.cend() && *i == start + count)
+ count++;
+ groups[start] = count;
+ }
+
+ // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
+ // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
+ if (groups.empty())
+ groups[0] = 0;
+
+ auto startxref = document.length() + 1;
+ document += "\nxref\n";
+ for (const auto& g : groups) {
+ document += ssprintf("%u %zu\n", g.first, g.second);
+ for (size_t i = 0; i < g.second; i++) {
+ auto& ref = xref[g.first + i];
+ document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
+ }
+ }
+
+ trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
+ document += "trailer\n" + pdf_serialize(trailer)
+ + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// Make a PDF object representing the given point in time
+static pdf_object pdf_date(time_t timestamp) {
+ struct tm parts;
+ assert(localtime_r(&timestamp, &parts));
+
+ char buf[64];
+ assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
+
+ std::string offset = "Z";
+ auto offset_min = parts.tm_gmtoff / 60;
+ if (parts.tm_gmtoff < 0)
+ offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
+ if (parts.tm_gmtoff > 0)
+ offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
+ return {pdf_object::STRING, buf + offset};
+}
+
+static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
+ auto obj = pdf.get(node_n, node_generation);
+ if (obj.type != pdf_object::DICT)
+ return {pdf_object::NIL};
+
+ // Out of convenience; these aren't filled normally
+ obj.n = node_n;
+ obj.generation = node_generation;
+
+ auto type = obj.dict.find("Type");
+ if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
+ return {pdf_object::NIL};
+ if (type->second.string == "Page")
+ return obj;
+ if (type->second.string != "Pages")
+ return {pdf_object::NIL};
+
+ // XXX technically speaking, this may be an indirect reference. The correct way to solve this
+ // seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
+ auto kids = obj.dict.find("Kids");
+ if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
+ || kids->second.array.empty()
+ || kids->second.array.at(0).type != pdf_object::REFERENCE)
+ return {pdf_object::NIL};
+
+ // XXX nothing prevents us from recursing in an evil circular graph
+ return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// /All/ bytes are checked, except for the signature hexstring itself
+static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len,
+ const std::string& pkcs12_path,
+ const std::string& pkcs12_pass) {
+ size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
+ if (pkcs12_path.empty())
+ return "undefined path to the signing key";
+
+ auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
+ if (!pkcs12_fp)
+ return pkcs12_path + ": " + strerror(errno);
+
+ // Abandon hope, all ye who enter OpenSSL! Half of it is undocumented.
+ OpenSSL_add_all_algorithms();
+ ERR_load_crypto_strings();
+ ERR_clear_error();
+
+ PKCS12* p12 = nullptr;
+ EVP_PKEY* private_key = nullptr;
+ X509* certificate = nullptr;
+ STACK_OF(X509)* chain = nullptr;
+ PKCS7* p7 = nullptr;
+ int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
+ BIO* p7bio = nullptr;
+ unsigned char* buf = nullptr;
+
+ // OpenSSL error reasons will usually be of more value than any distinction I can come up with
+ std::string err = "OpenSSL failure";
+
+ if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
+ || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
+ err = pkcs12_path + ": parse failure";
+ goto error;
+ }
+ if (!private_key || !certificate) {
+ err = pkcs12_path + ": must contain a private key and a valid certificate chain";
+ goto error;
+ }
+ // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
+ if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
+ err = "the certificate's key usage must include digital signatures or non-repudiation";
+ goto error;
+ }
+ if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
+ err = "the certificate's extended key usage must include S/MIME";
+ goto error;
+ }
+#if 0 // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
+ if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
+ err = "the certificate can't be used for S/MIME digital signatures";
+ goto error;
+ }
+#endif
+
+ // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
+ if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
+ || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
+ goto error;
+ // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
+ // PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
+ for (int i = 0; i < sk_X509_num(chain); i++)
+ if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
+ goto error;
+
+ // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
+ // a copy of the whole document. Hopefully this writes directly into a digest BIO.
+ if (!(p7bio = PKCS7_dataInit(p7, nullptr))
+ || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
+ || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
+ || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
+ goto error;
+
+#if 0
+ {
+ // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
+ // Context: https://stackoverflow.com/a/29253469
+ auto fp = fopen("pdf_signature.pem", "wb");
+ assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
+ }
+#endif
+
+ if ((len = i2d_PKCS7(p7, &buf)) < 0)
+ goto error;
+ if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
+ // The obvious solution is to increase the allocation... or spend a week reading specifications
+ // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
+ err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
+ sign_len - 2, size_t(len) * 2);
+ goto error;
+ }
+ for (int i = 0; i < len; i++) {
+ document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
+ document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
+ }
+ err.clear();
+
+error:
+ OPENSSL_free(buf);
+ BIO_free_all(p7bio);
+ PKCS7_free(p7);
+ sk_X509_pop_free(chain, X509_free);
+ X509_free(certificate);
+ EVP_PKEY_free(private_key);
+ PKCS12_free(p12);
+
+ // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
+ while (auto code = ERR_get_error())
+ if (auto reason = ERR_reason_error_string(code))
+ err = err + "; " + reason;
+
+ fclose(pkcs12_fp);
+ return err;
+}
+
+} // anonymous namespace
+
+// -------------------------------------------------------------------------------------------------
+
+// The presumption here is that the document is valid and that it doesn't employ cross-reference
+// streams from PDF 1.5, or at least constitutes a hybrid-reference file. The results with
+// PDF 2.0 (2017) are currently unknown as the standard costs money.
+//
+// Carelessly assumes that the version of the original document is at most PDF 1.6.
+//
+// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
+std::string pdf_simple_sign(std::string& document,
+ const std::string& pkcs12_path,
+ const std::string& pkcs12_pass) {
+ pdf_updater pdf(document);
+ auto err = pdf.initialize();
+ if (!err.empty())
+ return err;
+
+ auto root_ref = pdf.trailer.find("Root");
+ if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
+ return "trailer does not contain a reference to Root";
+ auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
+ if (root.type != pdf_object::DICT)
+ return "invalid Root dictionary reference";
+
+ // 8.7 Digital Signatures - /signature dictionary/
+ auto sigdict_n = pdf.allocate();
+ size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
+ pdf.update(sigdict_n, [&]{
+ // The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161.
+ pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
+ " /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
+ byterange_off = pdf.document.size();
+ pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
+ pdf.document.append("\n /Contents <");
+ sign_off = pdf.document.size();
+ pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
+ pdf.document.append("> >>");
+
+ // We actually need to exclude the hexstring quotes from signing
+ sign_off -= 1;
+ sign_len += 2;
+ });
+
+ // 8.6.3 Field Types - Signature Fields
+ pdf_object sigfield{pdf_object::DICT};
+ sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
+ sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
+ // 8.4.5 Annotations Types - Widget Annotations
+ // We can merge the Signature Annotation and omit Kids here
+ sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
+ sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
+ sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
+ sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
+ {pdf_object::NUMERIC, 0},
+ {pdf_object::NUMERIC, 0},
+ {pdf_object::NUMERIC, 0},
+ {pdf_object::NUMERIC, 0},
+ }}});
+
+ auto sigfield_n = pdf.allocate();
+ pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
+
+ auto pages_ref = root.dict.find("Pages");
+ if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
+ return "invalid Pages reference";
+ auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
+ if (page.type != pdf_object::DICT)
+ return "invalid or unsupported page tree";
+
+ // XXX assuming this won't be an indirectly referenced array
+ auto& annots = page.dict["Annots"];
+ if (annots.type != pdf_object::ARRAY)
+ annots = {pdf_object::ARRAY};
+ annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
+ pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
+
+ // 8.6.1 Interactive Form Dictionary
+ // XXX assuming there are no forms already, overwriting everything
+ root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
+ {"Fields", {std::vector<pdf_object>{
+ {pdf_object::REFERENCE, sigfield_n, 0}
+ }}},
+ {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
+ }};
+
+ // Upgrade the document version for SHA-256 etc.
+ // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
+ // version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
+ root.dict["Version"] = {pdf_object::NAME, "1.6"};
+ pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
+ pdf.flush_updates();
+
+ // Now that we know the length of everything, store byte ranges of what we're about to sign,
+ // which must be everything but the resulting signature itself
+ size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
+ auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
+ if (ranges.length() > byterange_len)
+ return "not enough space reserved for /ByteRange";
+ pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
+ return pdf_fill_in_signature(pdf.document, sign_off, sign_len, pkcs12_path, pkcs12_pass);
+}