From ceea7dca2f4ff356e136b9a72611b9b66d402eaa Mon Sep 17 00:00:00 2001
From: Přemysl Eric Janouch <p@janouch.name>
Date: Wed, 12 Aug 2020 07:29:57 +0200
Subject: C++: split out a library with a trivial interface

Closes #2.
---
 meson.build         |   9 +-
 pdf-simple-sign.cpp | 895 +--------------------------------------------------
 pdf-simple-sign.h   |  28 ++
 pdf.cpp             | 906 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 950 insertions(+), 888 deletions(-)
 create mode 100644 pdf-simple-sign.h
 create mode 100644 pdf.cpp
diff --git a/meson.build b/meson.build
index 0446cc3..6544cdb 100644
--- a/meson.build
+++ b/meson.build
@@ -1,5 +1,12 @@
 project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
 cryptodep = dependency('libcrypto')
-executable('pdf-simple-sign', 'pdf-simple-sign.cpp',
+
+executable('pdf-simple-sign', 'pdf-simple-sign.cpp', 'pdf.cpp',
+	install : true,
+	dependencies : cryptodep)
+
+install_headers('pdf-simple-sign.h')
+library('pdf-simple-sign', 'pdf.cpp',
+	soversion : 0,
 	install : true,
 	dependencies : cryptodep)
diff --git a/pdf-simple-sign.cpp b/pdf-simple-sign.cpp
index 818270e..8f3fe3f 100644
--- a/pdf-simple-sign.cpp
+++ b/pdf-simple-sign.cpp
@@ -2,7 +2,7 @@
 //
 // pdf-simple-sign: simple PDF signer
 //
-// Copyright (c) 2017, Přemysl Eric Janouch <p@janouch.name>
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
@@ -17,899 +17,22 @@
 //
 
 #include <cstdio>
-#include <cmath>
-#undef NDEBUG
-#include <cassert>
-
-#include <vector>
-#include <map>
-#include <regex>
-#include <memory>
-#include <set>
-
-#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
-#error Need libstdc++ >= 4.9 for <regex>
-#endif
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+#include <cstdarg>
 
 #include <unistd.h>
 #include <getopt.h>
-#include <openssl/err.h>
-#include <openssl/x509v3.h>
-#include <openssl/pkcs12.h>
-
-// -------------------------------------------------------------------------------------------------
-
-using uint = unsigned int;
-
-static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
-  std::string res;
-  if (v.empty())
-    return res;
-  for (const auto& s : v)
-    res += s + delim;
-  return res.substr(0, res.length() - delim.length());
-}
-
-template<typename... Args>
-std::string ssprintf(const std::string& format, Args... args) {
-  size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
-  std::unique_ptr<char[]> buf(new char[size]);
-  std::snprintf(buf.get(), size, format.c_str(), args...);
-  return std::string(buf.get(), buf.get() + size - 1);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// PDF token/object thingy.  Objects may be composed either from one or a sequence of tokens.
-/// The PDF Reference doesn't actually speak of tokens.
-struct pdf_object {
-  enum type {
-    END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
-    // Simple tokens
-    B_ARRAY, E_ARRAY, B_DICT, E_DICT,
-    // Higher-level objects
-    ARRAY, DICT, OBJECT, REFERENCE,
-  } type = END;
-
-  std::string string;                      ///< END (error message), COMMENT/KEYWORD/NAME/STRING
-  double number = 0.;                      ///< BOOL, NUMERIC
-  std::vector<pdf_object> array;           ///< ARRAY, OBJECT
-  std::map<std::string, pdf_object> dict;  ///< DICT, in the future also STREAM
-  uint n = 0, generation = 0;              ///< OBJECT, REFERENCE
-
-  pdf_object(enum type type = END)                          : type(type) {}
-  pdf_object(enum type type, double v)                      : type(type), number(v) {}
-  pdf_object(enum type type, const std::string& v)          : type(type), string(v) {}
-  pdf_object(enum type type, uint n, uint g)                : type(type), n(n), generation(g) {}
-  pdf_object(const std::vector<pdf_object>& array)          : type(ARRAY), array(array) {}
-  pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
-
-  pdf_object(const pdf_object&)            = default;
-  pdf_object(pdf_object&&)                 = default;
-  pdf_object& operator=(const pdf_object&) = default;
-  pdf_object& operator=(pdf_object&&)      = default;
-
-  /// Return whether this is a number without a fractional part
-  bool is_integer() const {
-    double tmp;
-    return type == NUMERIC && std::modf(number, &tmp) == 0.;
-  }
-};
-
-/// Basic lexical analyser for the Portable Document Format, giving limited error information
-struct pdf_lexer {
-  const unsigned char* p;
-  pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
-
-  static constexpr const char* oct_alphabet = "01234567";
-  static constexpr const char* dec_alphabet = "0123456789";
-  static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
-  static constexpr const char* whitespace = "\t\n\f\r ";
-  static constexpr const char* delimiters = "()<>[]{}/%";
-
-  bool eat_newline(int ch) {
-    if (ch == '\r') {
-      if (*p == '\n') p++;
-      return true;
-    }
-    return ch == '\n';
-  }
-
-  pdf_object string() {
-    std::string value;
-    int parens = 1;
-    while (1) {
-      if (!*p) return {pdf_object::END, "unexpected end of string"};
-      auto ch = *p++;
-      if (eat_newline(ch)) ch = '\n';
-      else if (ch == '(') { parens++; }
-      else if (ch == ')') { if (!--parens) break; }
-      else if (ch == '\\') {
-        if (!*p) return {pdf_object::END, "unexpected end of string"};
-        switch ((ch = *p++)) {
-        case 'n': ch = '\n'; break;
-        case 'r': ch = '\r'; break;
-        case 't': ch = '\t'; break;
-        case 'b': ch = '\b'; break;
-        case 'f': ch = '\f'; break;
-        default:
-          if (eat_newline(ch))
-            continue;
-          std::string octal;
-          if (ch && strchr(oct_alphabet, ch)) {
-            octal += ch;
-            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
-            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
-            ch = std::stoi(octal, nullptr, 8);
-          }
-        }
-      }
-      value += ch;
-    }
-    return {pdf_object::STRING, value};
-  }
-
-  pdf_object string_hex() {
-    std::string value, buf;
-    while (*p != '>') {
-      if (!*p) return {pdf_object::END, "unexpected end of hex string"};
-      if (!strchr(hex_alphabet, *p))
-        return {pdf_object::END, "invalid hex string"};
-      buf += *p++;
-      if (buf.size() == 2) {
-        value += char(std::stoi(buf, nullptr, 16));
-        buf.clear();
-      }
-    }
-    p++;
-    if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
-    return {pdf_object::STRING, value};
-  }
-
-  pdf_object name() {
-    std::string value;
-    while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
-      auto ch = *p++;
-      if (ch == '#') {
-        std::string hexa;
-        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
-        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
-        if (hexa.size() != 2)
-          return {pdf_object::END, "invalid name hexa escape"};
-        ch = char(std::stoi(hexa, nullptr, 16));
-      }
-      value += ch;
-    }
-    if (value.empty()) return {pdf_object::END, "unexpected end of name"};
-    return {pdf_object::NAME, value};
-  }
-
-  pdf_object comment() {
-    std::string value;
-    while (*p && *p != '\r' && *p != '\n')
-      value += *p++;
-    return {pdf_object::COMMENT, value};
-  }
-
-  // XXX maybe invalid numbers should rather be interpreted as keywords
-  pdf_object number() {
-    std::string value;
-    if (*p == '-')
-      value += *p++;
-    bool real = false, digits = false;
-    while (*p) {
-      if (strchr(dec_alphabet, *p))
-        digits = true;
-      else if (*p == '.' && !real)
-        real = true;
-      else
-        break;
-      value += *p++;
-    }
-    if (!digits) return {pdf_object::END, "invalid number"};
-    return {pdf_object::NUMERIC, std::stod(value, nullptr)};
-  }
-
-  pdf_object next() {
-    if (!*p)
-      return {pdf_object::END};
-    if (strchr("-0123456789.", *p))
-      return number();
-
-    // {} end up being keywords, we might want to error out on those
-    std::string value;
-    while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
-      value += *p++;
-    if (!value.empty()) {
-      if (value == "null")  return {pdf_object::NIL};
-      if (value == "true")  return {pdf_object::BOOL, 1};
-      if (value == "false") return {pdf_object::BOOL, 0};
-      return {pdf_object::KEYWORD, value};
-    }
-
-    switch (char ch = *p++) {
-    case '/': return name();
-    case '%': return comment();
-    case '(': return string();
-    case '[': return {pdf_object::B_ARRAY};
-    case ']': return {pdf_object::E_ARRAY};
-    case '<':
-      if (*p++ == '<')
-        return {pdf_object::B_DICT};
-      p--;
-      return string_hex();
-    case '>':
-      if (*p++ == '>')
-        return {pdf_object::E_DICT};
-      p--;
-      return {pdf_object::END, "unexpected '>'"};
-    default:
-      if (eat_newline(ch))
-        return {pdf_object::NL};
-      if (strchr(whitespace, ch))
-        return next();
-      return {pdf_object::END, "unexpected input"};
-    }
-  }
-};
-
-// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
-static std::string pdf_serialize(const pdf_object& o) {
-  switch (o.type) {
-  case pdf_object::NL:      return "\n";
-  case pdf_object::NIL:     return "null";
-  case pdf_object::BOOL:    return o.number ? "true" : "false";
-  case pdf_object::NUMERIC:
-  {
-    if (o.is_integer()) return std::to_string((long long) o.number);
-    return std::to_string(o.number);
-  }
-  case pdf_object::KEYWORD: return o.string;
-  case pdf_object::NAME:
-  {
-    std::string escaped = "/";
-    for (char c : o.string) {
-      if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
-        escaped += ssprintf("#%02x", c);
-      else
-        escaped += c;
-    }
-    return escaped;
-  }
-  case pdf_object::STRING:
-  {
-    std::string escaped;
-    for (char c : o.string) {
-      if (c == '\\' || c == '(' || c == ')')
-        escaped += '\\';
-      escaped += c;
-    }
-    return "(" + escaped + ")";
-  }
-  case pdf_object::B_ARRAY: return "[";
-  case pdf_object::E_ARRAY: return "]";
-  case pdf_object::B_DICT:  return "<<";
-  case pdf_object::E_DICT:  return ">>";
-  case pdf_object::ARRAY:
-  {
-    std::vector<std::string> v;
-    for (const auto& i : o.array)
-      v.push_back(pdf_serialize(i));
-    return "[ " + concatenate(v, " ") + " ]";
-  }
-  case pdf_object::DICT:
-  {
-    std::string s;
-    for (const auto i : o.dict)
-      // FIXME the key is also supposed to be escaped by pdf_serialize()
-      s += " /" + i.first + " " + pdf_serialize(i.second);
-    return "<<" + s + " >>";
-  }
-  case pdf_object::OBJECT:
-    return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
-  case pdf_object::REFERENCE:
-    return ssprintf("%u %u R", o.n, o.generation);
-  default:
-    assert(!"unsupported token for serialization");
-  }
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// Utility class to help read and possibly incrementally update PDF files
-class pdf_updater {
-  struct ref {
-    size_t offset = 0;     ///< File offset or N of the next free entry
-    uint generation = 0;   ///< Object generation
-    bool free = true;      ///< Whether this N has been deleted
-  };
-
-  std::vector<ref> xref;   ///< Cross-reference table
-  size_t xref_size = 0;    ///< Current cross-reference table size, correlated to xref.size()
-  std::set<uint> updated;  ///< List of updated objects
-
-  pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
-  pdf_object parse_R(std::vector<pdf_object>& stack) const;
-  pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
-  std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
-
-public:
-  /// The new trailer dictionary to be written, initialized with the old one
-  std::map<std::string, pdf_object> trailer;
-
-  std::string& document;
-  pdf_updater(std::string& document) : document(document) {}
-
-  /// Build the cross-reference table and prepare a new trailer dictionary
-  std::string initialize();
-  /// Retrieve an object by its number and generation -- may return NIL or END with an error
-  pdf_object get(uint n, uint generation) const;
-  /// Allocate a new object number
-  uint allocate();
-  /// Append an updated object to the end of the document
-  void update(uint n, std::function<void()> fill);
-  /// Write an updated cross-reference table and trailer
-  void flush_updates();
-};
-
-// -------------------------------------------------------------------------------------------------
-
-/// If the object is an error, forward its message, otherwise return err.
-static std::string pdf_error(const pdf_object& o, const char* err) {
-  if (o.type != pdf_object::END || o.string.empty()) return err;
-  return o.string;
-}
-
-pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
-  if (stack.size() < 2)
-    return {pdf_object::END, "missing object ID pair"};
-
-  auto g = stack.back(); stack.pop_back();
-  auto n = stack.back(); stack.pop_back();
-  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
-   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
-    return {pdf_object::END, "invalid object ID pair"};
-
-  pdf_object obj{pdf_object::OBJECT};
-  obj.n = n.number;
-  obj.generation = g.number;
-
-  while (1) {
-    auto object = parse(lex, obj.array);
-    if (object.type == pdf_object::END)
-      return {pdf_object::END, pdf_error(object, "object doesn't end")};
-    if (object.type == pdf_object::KEYWORD && object.string == "endobj")
-      break;
-    obj.array.push_back(std::move(object));
-  }
-  return obj;
-}
-
-pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
-  if (stack.size() < 2)
-    return {pdf_object::END, "missing reference ID pair"};
-
-  auto g = stack.back(); stack.pop_back();
-  auto n = stack.back(); stack.pop_back();
-  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
-   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
-    return {pdf_object::END, "invalid reference ID pair"};
-
-  pdf_object ref{pdf_object::REFERENCE};
-  ref.n = n.number;
-  ref.generation = g.number;
-  return ref;
-}
-
-/// Read an object at the lexer's position.  Not a strict parser.
-pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
-  auto token = lex.next();
-  switch (token.type) {
-  case pdf_object::NL:
-  case pdf_object::COMMENT:
-    // These are not important to parsing, not even for this procedure's needs
-    return parse(lex, stack);
-  case pdf_object::B_ARRAY:
-  {
-    std::vector<pdf_object> array;
-    while (1) {
-      auto object = parse(lex, array);
-      if (object.type == pdf_object::END)
-        return {pdf_object::END, pdf_error(object, "array doesn't end")};
-      if (object.type == pdf_object::E_ARRAY)
-        break;
-      array.push_back(std::move(object));
-    }
-    return array;
-  }
-  case pdf_object::B_DICT:
-  {
-    std::vector<pdf_object> array;
-    while (1) {
-      auto object = parse(lex, array);
-      if (object.type == pdf_object::END)
-        return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
-      if (object.type == pdf_object::E_DICT)
-        break;
-      array.push_back(std::move(object));
-    }
-    if (array.size() % 2)
-      return {pdf_object::END, "unbalanced dictionary"};
-    std::map<std::string, pdf_object> dict;
-    for (size_t i = 0; i < array.size(); i += 2) {
-      if (array[i].type != pdf_object::NAME)
-        return {pdf_object::END, "invalid dictionary key type"};
-      dict.insert({array[i].string, std::move(array[i + 1])});
-    }
-    return dict;
-  }
-  case pdf_object::KEYWORD:
-    // Appears in the document body, typically needs to access the cross-reference table
-    // TODO use the xref to read /Length etc. once we actually need to read such objects;
-    //   presumably streams can use the pdf_object::string member
-    if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
-    if (token.string == "obj")    return parse_obj(lex, stack);
-    if (token.string == "R")      return parse_R(stack);
-    return token;
-  default:
-    return token;
-  }
-}
-
-std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
-  std::vector<pdf_object> throwaway_stack;
-  {
-    auto keyword = parse(lex, throwaway_stack);
-    if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
-      return "invalid xref table";
-  }
-  while (1) {
-    auto object = parse(lex, throwaway_stack);
-    if (object.type == pdf_object::END)
-      return pdf_error(object, "unexpected EOF while looking for the trailer");
-    if (object.type == pdf_object::KEYWORD && object.string == "trailer")
-      break;
-
-    auto second = parse(lex, throwaway_stack);
-    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
-     || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
-      return "invalid xref section header";
-
-    const size_t start = object.number;
-    const size_t count = second.number;
-    for (size_t i = 0; i < count; i++) {
-      auto off = parse(lex, throwaway_stack);
-      auto gen = parse(lex, throwaway_stack);
-      auto key = parse(lex, throwaway_stack);
-      if (!off.is_integer() || off.number < 0 || off.number > document.length()
-       || !gen.is_integer() || gen.number < 0 || gen.number > 65535
-       || key.type != pdf_object::KEYWORD)
-        return "invalid xref entry";
-
-      bool free = true;
-      if (key.string == "n")
-        free = false;
-      else if (key.string != "f")
-        return "invalid xref entry";
-
-      auto n = start + i;
-      if (loaded_entries.count(n))
-        continue;
-      if (n >= xref.size())
-        xref.resize(n + 1);
-      loaded_entries.insert(n);
-
-      auto& ref = xref[n];
-      ref.generation = gen.number;
-      ref.offset = off.number;
-      ref.free = free;
-    }
-  }
-  return "";
-}
-
-// -------------------------------------------------------------------------------------------------
-
-std::string pdf_updater::initialize() {
-  // We only need to look for startxref roughly within the last kibibyte of the document
-  static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
-  std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
-
-  std::smatch m;
-  if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
-    return "cannot find startxref";
-
-  size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
-  std::set<size_t> loaded_xrefs;
-  std::set<uint> loaded_entries;
-
-  std::vector<pdf_object> throwaway_stack;
-  while (1) {
-    if (loaded_xrefs.count(xref_offset))
-      return "circular xref offsets";
-    if (xref_offset >= document.length())
-      return "invalid xref offset";
-
-    pdf_lexer lex(document.c_str() + xref_offset);
-    auto err = load_xref(lex, loaded_entries);
-    if (!err.empty()) return err;
-
-    auto trailer = parse(lex, throwaway_stack);
-    if (trailer.type != pdf_object::DICT)
-      return pdf_error(trailer, "invalid trailer dictionary");
-    if (loaded_xrefs.empty())
-      this->trailer = trailer.dict;
-    loaded_xrefs.insert(xref_offset);
-
-    const auto prev_offset = trailer.dict.find("Prev");
-    if (prev_offset == trailer.dict.end())
-      break;
-    // FIXME we don't check for size_t over or underflow
-    if (!prev_offset->second.is_integer())
-      return "invalid Prev offset";
-    xref_offset = prev_offset->second.number;
-  }
-
-  trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
-  const auto last_size = trailer.find("Size");
-  if (last_size == trailer.end() || !last_size->second.is_integer() ||
-      last_size->second.number <= 0)
-    return "invalid or missing cross-reference table Size";
-
-  xref_size = last_size->second.number;
-  return "";
-}
-
-pdf_object pdf_updater::get(uint n, uint generation) const {
-  if (n >= xref_size)
-    return {pdf_object::NIL};
-
-  const auto& ref = xref[n];
-  if (ref.free || ref.generation != generation || ref.offset >= document.length())
-    return {pdf_object::NIL};
-
-  pdf_lexer lex(document.c_str() + ref.offset);
-  std::vector<pdf_object> stack;
-  while (1) {
-    auto object = parse(lex, stack);
-    if (object.type == pdf_object::END)
-      return object;
-    if (object.type != pdf_object::OBJECT)
-      stack.push_back(std::move(object));
-    else if (object.n != n || object.generation != generation)
-      return {pdf_object::END, "object mismatch"};
-    else
-      return std::move(object.array.at(0));
-  }
-}
-
-uint pdf_updater::allocate() {
-  assert(xref_size < UINT_MAX);
-
-  auto n = xref_size++;
-  if (xref.size() < xref_size)
-    xref.resize(xref_size);
-
-  // We don't make sure it gets a subsection in the update yet because we
-  // make no attempts at fixing the linked list of free items either
-  return n;
-}
-
-void pdf_updater::update(uint n, std::function<void()> fill) {
-  auto& ref = xref.at(n);
-  ref.offset = document.length() + 1;
-  ref.free = false;
-  updated.insert(n);
-
-  document += ssprintf("\n%u %u obj\n", n, ref.generation);
-  // Separately so that the callback can use document.length() to get the current offset
-  fill();
-  document += "\nendobj";
-}
-
-void pdf_updater::flush_updates() {
-  std::map<uint, size_t> groups;
-  for (auto i = updated.cbegin(); i != updated.cend(); ) {
-    size_t start = *i, count = 1;
-    while (++i != updated.cend() && *i == start + count)
-      count++;
-    groups[start] = count;
-  }
-
-  // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
-  // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
-  if (groups.empty())
-    groups[0] = 0;
-
-  auto startxref = document.length() + 1;
-  document += "\nxref\n";
-  for (const auto& g : groups) {
-    document += ssprintf("%u %zu\n", g.first, g.second);
-    for (size_t i = 0; i < g.second; i++) {
-      auto& ref = xref[g.first + i];
-      document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
-    }
-  }
-
-  trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
-  document += "trailer\n" + pdf_serialize(trailer)
-    + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// Make a PDF object representing the given point in time
-static pdf_object pdf_date(time_t timestamp) {
-  struct tm parts;
-  assert(localtime_r(&timestamp, &parts));
-
-  char buf[64];
-  assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
-
-  std::string offset = "Z";
-  auto offset_min = parts.tm_gmtoff / 60;
-  if (parts.tm_gmtoff < 0)
-    offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
-  if (parts.tm_gmtoff > 0)
-    offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
-  return {pdf_object::STRING, buf + offset};
-}
-
-static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
-  auto obj = pdf.get(node_n, node_generation);
-  if (obj.type != pdf_object::DICT)
-    return {pdf_object::NIL};
-
-  // Out of convenience; these aren't filled normally
-  obj.n = node_n;
-  obj.generation = node_generation;
-
-  auto type = obj.dict.find("Type");
-  if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
-    return {pdf_object::NIL};
-  if (type->second.string == "Page")
-    return obj;
-  if (type->second.string != "Pages")
-    return {pdf_object::NIL};
-
-  // XXX technically speaking, this may be an indirect reference.  The correct way to solve this
-  //   seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
-  auto kids = obj.dict.find("Kids");
-  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
-   || kids->second.array.empty()
-   || kids->second.array.at(0).type != pdf_object::REFERENCE)
-    return {pdf_object::NIL};
-
-  // XXX nothing prevents us from recursing in an evil circular graph
-  return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
-}
-
-// -------------------------------------------------------------------------------------------------
-
-static std::string pkcs12_path, pkcs12_pass;
-
-// /All/ bytes are checked, except for the signature hexstring itself
-static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len) {
-  size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
-  if (pkcs12_path.empty())
-    return "undefined path to the signing key";
-
-  auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
-  if (!pkcs12_fp)
-    return pkcs12_path + ": " + strerror(errno);
-
-  // Abandon hope, all ye who enter OpenSSL!  Half of it is undocumented.
-  OpenSSL_add_all_algorithms();
-  ERR_load_crypto_strings();
-  ERR_clear_error();
-
-  PKCS12* p12 = nullptr;
-  EVP_PKEY* private_key = nullptr;
-  X509* certificate = nullptr;
-  STACK_OF(X509)* chain = nullptr;
-  PKCS7* p7 = nullptr;
-  int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
-  BIO* p7bio = nullptr;
-  unsigned char* buf = nullptr;
-
-  // OpenSSL error reasons will usually be of more value than any distinction I can come up with
-  std::string err = "OpenSSL failure";
-
-  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
-   || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
-    err = pkcs12_path + ": parse failure";
-    goto error;
-  }
-  if (!private_key || !certificate) {
-    err = pkcs12_path + ": must contain a private key and a valid certificate chain";
-    goto error;
-  }
-  // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
-  if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
-    err = "the certificate's key usage must include digital signatures or non-repudiation";
-    goto error;
-  }
-  if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
-    err = "the certificate's extended key usage must include S/MIME";
-    goto error;
-  }
-#if 0  // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
-  if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
-    err = "the certificate can't be used for S/MIME digital signatures";
-    goto error;
-  }
-#endif
-
-  // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
-  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
-   || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
-    goto error;
-  // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
-  //   PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
-  for (int i = 0; i < sk_X509_num(chain); i++)
-    if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
-      goto error;
-
-  // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
-  // a copy of the whole document.  Hopefully this writes directly into a digest BIO.
-  if (!(p7bio = PKCS7_dataInit(p7, nullptr))
-   || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
-   || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
-   || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
-    goto error;
-
-#if 0
-  {
-    // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
-    // Context: https://stackoverflow.com/a/29253469
-    auto fp = fopen("pdf_signature.pem", "wb");
-    assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
-  }
-#endif
-
-  if ((len = i2d_PKCS7(p7, &buf)) < 0)
-    goto error;
-  if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
-    // The obvious solution is to increase the allocation... or spend a week reading specifications
-    // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
-    err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
-                   sign_len - 2, size_t(len) * 2);
-    goto error;
-  }
-  for (int i = 0; i < len; i++) {
-    document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
-    document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
-  }
-  err.clear();
-
-error:
-  OPENSSL_free(buf);
-  BIO_free_all(p7bio);
-  PKCS7_free(p7);
-  sk_X509_pop_free(chain, X509_free);
-  X509_free(certificate);
-  EVP_PKEY_free(private_key);
-  PKCS12_free(p12);
-
-  // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
-  while (auto code = ERR_get_error())
-    if (auto reason = ERR_reason_error_string(code))
-      err = err + "; " + reason;
-
-  fclose(pkcs12_fp);
-  return err;
-}
-
-// -------------------------------------------------------------------------------------------------
-
-/// The presumption here is that the document is valid and that it doesn't employ cross-reference
-/// streams from PDF 1.5, or at least constitutes a hybrid-reference file.  The results with
-/// PDF 2.0 (2017) are currently unknown as the standard costs money.
-///
-/// Carelessly assumes that the version of the original document is at most PDF 1.6.
-///
-/// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
-/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
-/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
-static std::string pdf_sign(std::string& document) {
-  pdf_updater pdf(document);
-  auto err = pdf.initialize();
-  if (!err.empty())
-    return err;
-
-  auto root_ref = pdf.trailer.find("Root");
-  if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
-    return "trailer does not contain a reference to Root";
-  auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
-  if (root.type != pdf_object::DICT)
-    return "invalid Root dictionary reference";
-
-  // 8.7 Digital Signatures - /signature dictionary/
-  auto sigdict_n = pdf.allocate();
-  size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
-  pdf.update(sigdict_n, [&]{
-    // The timestamp is important for Adobe Acrobat Reader DC.  The ideal would be to use RFC 3161.
-    pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
-                        "   /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
-    byterange_off = pdf.document.size();
-    pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
-    pdf.document.append("\n   /Contents <");
-    sign_off = pdf.document.size();
-    pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
-    pdf.document.append("> >>");
-
-    // We actually need to exclude the hexstring quotes from signing
-    sign_off -= 1;
-    sign_len += 2;
-  });
-
-  // 8.6.3 Field Types - Signature Fields
-  pdf_object sigfield{pdf_object::DICT};
-  sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
-  sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
-  // 8.4.5 Annotations Types - Widget Annotations
-  // We can merge the Signature Annotation and omit Kids here
-  sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
-  sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
-  sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
-  sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
-    {pdf_object::NUMERIC, 0},
-    {pdf_object::NUMERIC, 0},
-    {pdf_object::NUMERIC, 0},
-    {pdf_object::NUMERIC, 0},
-  }}});
-
-  auto sigfield_n = pdf.allocate();
-  pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
-
-  auto pages_ref = root.dict.find("Pages");
-  if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
-    return "invalid Pages reference";
-  auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
-  if (page.type != pdf_object::DICT)
-    return "invalid or unsupported page tree";
-
-  // XXX assuming this won't be an indirectly referenced array
-  auto& annots = page.dict["Annots"];
-  if (annots.type != pdf_object::ARRAY)
-    annots = {pdf_object::ARRAY};
-  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
-  pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
-
-  // 8.6.1 Interactive Form Dictionary
-  // XXX assuming there are no forms already, overwriting everything
-  root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
-    {"Fields", {std::vector<pdf_object>{
-      {pdf_object::REFERENCE, sigfield_n, 0}
-    }}},
-    {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
-  }};
-
-  // Upgrade the document version for SHA-256 etc.
-  // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
-  //   version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
-  root.dict["Version"] = {pdf_object::NAME, "1.6"};
-  pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
-  pdf.flush_updates();
-
-  // Now that we know the length of everything, store byte ranges of what we're about to sign,
-  // which must be everything but the resulting signature itself
-  size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
-  auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
-  if (ranges.length() > byterange_len)
-    return "not enough space reserved for /ByteRange";
-  pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
-  return pdf_fill_in_signature(pdf.document, sign_off, sign_len);
-}
 
-// -------------------------------------------------------------------------------------------------
+#include "pdf-simple-sign.h"
 
 __attribute__((format(printf, 2, 3)))
 static void die(int status, const char* format, ...) {
   va_list ap;
   va_start(ap, format);
   if (isatty(fileno(stderr)))
-    vfprintf(stderr, ssprintf("\x1b[31m%s\x1b[0m\n", format).c_str(), ap);
+    vfprintf(stderr, ("\x1b[31m" + std::string(format) + "\x1b[0m\n").c_str(), ap);
   else
     vfprintf(stderr, format, ap);
   va_end(ap);
@@ -949,8 +72,6 @@ int main(int argc, char* argv[]) {
 
   const char* input_path  = argv[0];
   const char* output_path = argv[1];
-  pkcs12_path = argv[2];
-  pkcs12_pass = argv[3];
 
   std::string pdf_document;
   if (auto fp = fopen(input_path, "rb")) {
@@ -964,7 +85,7 @@ int main(int argc, char* argv[]) {
     die(1, "%s: %s", input_path, strerror(errno));
   }
 
-  auto err = pdf_sign(pdf_document);
+  auto err = pdf_simple_sign(pdf_document, argv[2], argv[3]);
   if (!err.empty()) {
     die(2, "Error: %s", err.c_str());
   }
diff --git a/pdf-simple-sign.h b/pdf-simple-sign.h
new file mode 100644
index 0000000..022731c
--- /dev/null
+++ b/pdf-simple-sign.h
@@ -0,0 +1,28 @@
+// vim: set sw=2 ts=2 sts=2 et tw=100:
+//
+// pdf-simple-sign: simple PDF signer
+//
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#pragma once
+
+#include <string>
+
+/// Sign basic PDF documents, as generated by e.g. Cairo, using the key-certificate pair
+/// stored in the PKCS#12 file named `pkcs12_path`, with password `pkcs12_pass`.
+/// Returns a non-empty error string on failure.
+std::string pdf_simple_sign(std::string& document,
+                            const std::string& pkcs12_path,
+                            const std::string& pkcs12_pass);
diff --git a/pdf.cpp b/pdf.cpp
new file mode 100644
index 0000000..867c3d4
--- /dev/null
+++ b/pdf.cpp
@@ -0,0 +1,906 @@
+// vim: set sw=2 ts=2 sts=2 et tw=100:
+//
+// pdf-simple-sign: simple PDF signer
+//
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+#include <cmath>
+#undef NDEBUG
+#include <cassert>
+
+#include <vector>
+#include <map>
+#include <regex>
+#include <memory>
+#include <set>
+
+#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
+#error Need libstdc++ >= 4.9 for <regex>
+#endif
+
+#include <openssl/err.h>
+#include <openssl/x509v3.h>
+#include <openssl/pkcs12.h>
+
+#include "pdf-simple-sign.h"
+
+namespace {
+
+using uint = unsigned int;
+
+static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
+  std::string res;
+  if (v.empty())
+    return res;
+  for (const auto& s : v)
+    res += s + delim;
+  return res.substr(0, res.length() - delim.length());
+}
+
+template<typename... Args>
+static std::string ssprintf(const std::string& format, Args... args) {
+  size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
+  std::unique_ptr<char[]> buf(new char[size]);
+  std::snprintf(buf.get(), size, format.c_str(), args...);
+  return std::string(buf.get(), buf.get() + size - 1);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// PDF token/object thingy.  Objects may be composed either from one or a sequence of tokens.
+/// The PDF Reference doesn't actually speak of tokens.
+struct pdf_object {
+  enum type {
+    END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
+    // Simple tokens
+    B_ARRAY, E_ARRAY, B_DICT, E_DICT,
+    // Higher-level objects
+    ARRAY, DICT, OBJECT, REFERENCE,
+  } type = END;
+
+  std::string string;                      ///< END (error message), COMMENT/KEYWORD/NAME/STRING
+  double number = 0.;                      ///< BOOL, NUMERIC
+  std::vector<pdf_object> array;           ///< ARRAY, OBJECT
+  std::map<std::string, pdf_object> dict;  ///< DICT, in the future also STREAM
+  uint n = 0, generation = 0;              ///< OBJECT, REFERENCE
+
+  pdf_object(enum type type = END)                          : type(type) {}
+  pdf_object(enum type type, double v)                      : type(type), number(v) {}
+  pdf_object(enum type type, const std::string& v)          : type(type), string(v) {}
+  pdf_object(enum type type, uint n, uint g)                : type(type), n(n), generation(g) {}
+  pdf_object(const std::vector<pdf_object>& array)          : type(ARRAY), array(array) {}
+  pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
+
+  pdf_object(const pdf_object&)            = default;
+  pdf_object(pdf_object&&)                 = default;
+  pdf_object& operator=(const pdf_object&) = default;
+  pdf_object& operator=(pdf_object&&)      = default;
+
+  /// Return whether this is a number without a fractional part
+  bool is_integer() const {
+    double tmp;
+    return type == NUMERIC && std::modf(number, &tmp) == 0.;
+  }
+};
+
+/// Basic lexical analyser for the Portable Document Format, giving limited error information
+struct pdf_lexer {
+  const unsigned char* p;
+  pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
+
+  static constexpr const char* oct_alphabet = "01234567";
+  static constexpr const char* dec_alphabet = "0123456789";
+  static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
+  static constexpr const char* whitespace = "\t\n\f\r ";
+  static constexpr const char* delimiters = "()<>[]{}/%";
+
+  bool eat_newline(int ch) {
+    if (ch == '\r') {
+      if (*p == '\n') p++;
+      return true;
+    }
+    return ch == '\n';
+  }
+
+  pdf_object string() {
+    std::string value;
+    int parens = 1;
+    while (1) {
+      if (!*p) return {pdf_object::END, "unexpected end of string"};
+      auto ch = *p++;
+      if (eat_newline(ch)) ch = '\n';
+      else if (ch == '(') { parens++; }
+      else if (ch == ')') { if (!--parens) break; }
+      else if (ch == '\\') {
+        if (!*p) return {pdf_object::END, "unexpected end of string"};
+        switch ((ch = *p++)) {
+        case 'n': ch = '\n'; break;
+        case 'r': ch = '\r'; break;
+        case 't': ch = '\t'; break;
+        case 'b': ch = '\b'; break;
+        case 'f': ch = '\f'; break;
+        default:
+          if (eat_newline(ch))
+            continue;
+          std::string octal;
+          if (ch && strchr(oct_alphabet, ch)) {
+            octal += ch;
+            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
+            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
+            ch = std::stoi(octal, nullptr, 8);
+          }
+        }
+      }
+      value += ch;
+    }
+    return {pdf_object::STRING, value};
+  }
+
+  pdf_object string_hex() {
+    std::string value, buf;
+    while (*p != '>') {
+      if (!*p) return {pdf_object::END, "unexpected end of hex string"};
+      if (!strchr(hex_alphabet, *p))
+        return {pdf_object::END, "invalid hex string"};
+      buf += *p++;
+      if (buf.size() == 2) {
+        value += char(std::stoi(buf, nullptr, 16));
+        buf.clear();
+      }
+    }
+    p++;
+    if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
+    return {pdf_object::STRING, value};
+  }
+
+  pdf_object name() {
+    std::string value;
+    while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
+      auto ch = *p++;
+      if (ch == '#') {
+        std::string hexa;
+        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
+        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
+        if (hexa.size() != 2)
+          return {pdf_object::END, "invalid name hexa escape"};
+        ch = char(std::stoi(hexa, nullptr, 16));
+      }
+      value += ch;
+    }
+    if (value.empty()) return {pdf_object::END, "unexpected end of name"};
+    return {pdf_object::NAME, value};
+  }
+
+  pdf_object comment() {
+    std::string value;
+    while (*p && *p != '\r' && *p != '\n')
+      value += *p++;
+    return {pdf_object::COMMENT, value};
+  }
+
+  // XXX maybe invalid numbers should rather be interpreted as keywords
+  pdf_object number() {
+    std::string value;
+    if (*p == '-')
+      value += *p++;
+    bool real = false, digits = false;
+    while (*p) {
+      if (strchr(dec_alphabet, *p))
+        digits = true;
+      else if (*p == '.' && !real)
+        real = true;
+      else
+        break;
+      value += *p++;
+    }
+    if (!digits) return {pdf_object::END, "invalid number"};
+    return {pdf_object::NUMERIC, std::stod(value, nullptr)};
+  }
+
+  pdf_object next() {
+    if (!*p)
+      return {pdf_object::END};
+    if (strchr("-0123456789.", *p))
+      return number();
+
+    // {} end up being keywords, we might want to error out on those
+    std::string value;
+    while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
+      value += *p++;
+    if (!value.empty()) {
+      if (value == "null")  return {pdf_object::NIL};
+      if (value == "true")  return {pdf_object::BOOL, 1};
+      if (value == "false") return {pdf_object::BOOL, 0};
+      return {pdf_object::KEYWORD, value};
+    }
+
+    switch (char ch = *p++) {
+    case '/': return name();
+    case '%': return comment();
+    case '(': return string();
+    case '[': return {pdf_object::B_ARRAY};
+    case ']': return {pdf_object::E_ARRAY};
+    case '<':
+      if (*p++ == '<')
+        return {pdf_object::B_DICT};
+      p--;
+      return string_hex();
+    case '>':
+      if (*p++ == '>')
+        return {pdf_object::E_DICT};
+      p--;
+      return {pdf_object::END, "unexpected '>'"};
+    default:
+      if (eat_newline(ch))
+        return {pdf_object::NL};
+      if (strchr(whitespace, ch))
+        return next();
+      return {pdf_object::END, "unexpected input"};
+    }
+  }
+};
+
+// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
+static std::string pdf_serialize(const pdf_object& o) {
+  switch (o.type) {
+  case pdf_object::NL:      return "\n";
+  case pdf_object::NIL:     return "null";
+  case pdf_object::BOOL:    return o.number ? "true" : "false";
+  case pdf_object::NUMERIC:
+  {
+    if (o.is_integer()) return std::to_string((long long) o.number);
+    return std::to_string(o.number);
+  }
+  case pdf_object::KEYWORD: return o.string;
+  case pdf_object::NAME:
+  {
+    std::string escaped = "/";
+    for (char c : o.string) {
+      if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
+        escaped += ssprintf("#%02x", c);
+      else
+        escaped += c;
+    }
+    return escaped;
+  }
+  case pdf_object::STRING:
+  {
+    std::string escaped;
+    for (char c : o.string) {
+      if (c == '\\' || c == '(' || c == ')')
+        escaped += '\\';
+      escaped += c;
+    }
+    return "(" + escaped + ")";
+  }
+  case pdf_object::B_ARRAY: return "[";
+  case pdf_object::E_ARRAY: return "]";
+  case pdf_object::B_DICT:  return "<<";
+  case pdf_object::E_DICT:  return ">>";
+  case pdf_object::ARRAY:
+  {
+    std::vector<std::string> v;
+    for (const auto& i : o.array)
+      v.push_back(pdf_serialize(i));
+    return "[ " + concatenate(v, " ") + " ]";
+  }
+  case pdf_object::DICT:
+  {
+    std::string s;
+    for (const auto& i : o.dict)
+      // FIXME the key is also supposed to be escaped by pdf_serialize()
+      s += " /" + i.first + " " + pdf_serialize(i.second);
+    return "<<" + s + " >>";
+  }
+  case pdf_object::OBJECT:
+    return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
+  case pdf_object::REFERENCE:
+    return ssprintf("%u %u R", o.n, o.generation);
+  default:
+    assert(!"unsupported token for serialization");
+  }
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// Utility class to help read and possibly incrementally update PDF files
+class pdf_updater {
+  struct ref {
+    size_t offset = 0;     ///< File offset or N of the next free entry
+    uint generation = 0;   ///< Object generation
+    bool free = true;      ///< Whether this N has been deleted
+  };
+
+  std::vector<ref> xref;   ///< Cross-reference table
+  size_t xref_size = 0;    ///< Current cross-reference table size, correlated to xref.size()
+  std::set<uint> updated;  ///< List of updated objects
+
+  pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
+  pdf_object parse_R(std::vector<pdf_object>& stack) const;
+  pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
+  std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
+
+public:
+  /// The new trailer dictionary to be written, initialized with the old one
+  std::map<std::string, pdf_object> trailer;
+
+  std::string& document;
+  pdf_updater(std::string& document) : document(document) {}
+
+  /// Build the cross-reference table and prepare a new trailer dictionary
+  std::string initialize();
+  /// Retrieve an object by its number and generation -- may return NIL or END with an error
+  pdf_object get(uint n, uint generation) const;
+  /// Allocate a new object number
+  uint allocate();
+  /// Append an updated object to the end of the document
+  void update(uint n, std::function<void()> fill);
+  /// Write an updated cross-reference table and trailer
+  void flush_updates();
+};
+
+// -------------------------------------------------------------------------------------------------
+
+/// If the object is an error, forward its message, otherwise return err.
+static std::string pdf_error(const pdf_object& o, const char* err) {
+  if (o.type != pdf_object::END || o.string.empty()) return err;
+  return o.string;
+}
+
+pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
+  if (stack.size() < 2)
+    return {pdf_object::END, "missing object ID pair"};
+
+  auto g = stack.back(); stack.pop_back();
+  auto n = stack.back(); stack.pop_back();
+  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+    return {pdf_object::END, "invalid object ID pair"};
+
+  pdf_object obj{pdf_object::OBJECT};
+  obj.n = n.number;
+  obj.generation = g.number;
+
+  while (1) {
+    auto object = parse(lex, obj.array);
+    if (object.type == pdf_object::END)
+      return {pdf_object::END, pdf_error(object, "object doesn't end")};
+    if (object.type == pdf_object::KEYWORD && object.string == "endobj")
+      break;
+    obj.array.push_back(std::move(object));
+  }
+  return obj;
+}
+
+pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
+  if (stack.size() < 2)
+    return {pdf_object::END, "missing reference ID pair"};
+
+  auto g = stack.back(); stack.pop_back();
+  auto n = stack.back(); stack.pop_back();
+  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+    return {pdf_object::END, "invalid reference ID pair"};
+
+  pdf_object ref{pdf_object::REFERENCE};
+  ref.n = n.number;
+  ref.generation = g.number;
+  return ref;
+}
+
+/// Read an object at the lexer's position.  Not a strict parser.
+pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
+  auto token = lex.next();
+  switch (token.type) {
+  case pdf_object::NL:
+  case pdf_object::COMMENT:
+    // These are not important to parsing, not even for this procedure's needs
+    return parse(lex, stack);
+  case pdf_object::B_ARRAY:
+  {
+    std::vector<pdf_object> array;
+    while (1) {
+      auto object = parse(lex, array);
+      if (object.type == pdf_object::END)
+        return {pdf_object::END, pdf_error(object, "array doesn't end")};
+      if (object.type == pdf_object::E_ARRAY)
+        break;
+      array.push_back(std::move(object));
+    }
+    return array;
+  }
+  case pdf_object::B_DICT:
+  {
+    std::vector<pdf_object> array;
+    while (1) {
+      auto object = parse(lex, array);
+      if (object.type == pdf_object::END)
+        return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
+      if (object.type == pdf_object::E_DICT)
+        break;
+      array.push_back(std::move(object));
+    }
+    if (array.size() % 2)
+      return {pdf_object::END, "unbalanced dictionary"};
+    std::map<std::string, pdf_object> dict;
+    for (size_t i = 0; i < array.size(); i += 2) {
+      if (array[i].type != pdf_object::NAME)
+        return {pdf_object::END, "invalid dictionary key type"};
+      dict.insert({array[i].string, std::move(array[i + 1])});
+    }
+    return dict;
+  }
+  case pdf_object::KEYWORD:
+    // Appears in the document body, typically needs to access the cross-reference table
+    // TODO use the xref to read /Length etc. once we actually need to read such objects;
+    //   presumably streams can use the pdf_object::string member
+    if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
+    if (token.string == "obj")    return parse_obj(lex, stack);
+    if (token.string == "R")      return parse_R(stack);
+    return token;
+  default:
+    return token;
+  }
+}
+
+std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
+  std::vector<pdf_object> throwaway_stack;
+  {
+    auto keyword = parse(lex, throwaway_stack);
+    if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
+      return "invalid xref table";
+  }
+  while (1) {
+    auto object = parse(lex, throwaway_stack);
+    if (object.type == pdf_object::END)
+      return pdf_error(object, "unexpected EOF while looking for the trailer");
+    if (object.type == pdf_object::KEYWORD && object.string == "trailer")
+      break;
+
+    auto second = parse(lex, throwaway_stack);
+    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
+     || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
+      return "invalid xref section header";
+
+    const size_t start = object.number;
+    const size_t count = second.number;
+    for (size_t i = 0; i < count; i++) {
+      auto off = parse(lex, throwaway_stack);
+      auto gen = parse(lex, throwaway_stack);
+      auto key = parse(lex, throwaway_stack);
+      if (!off.is_integer() || off.number < 0 || off.number > document.length()
+       || !gen.is_integer() || gen.number < 0 || gen.number > 65535
+       || key.type != pdf_object::KEYWORD)
+        return "invalid xref entry";
+
+      bool free = true;
+      if (key.string == "n")
+        free = false;
+      else if (key.string != "f")
+        return "invalid xref entry";
+
+      auto n = start + i;
+      if (loaded_entries.count(n))
+        continue;
+      if (n >= xref.size())
+        xref.resize(n + 1);
+      loaded_entries.insert(n);
+
+      auto& ref = xref[n];
+      ref.generation = gen.number;
+      ref.offset = off.number;
+      ref.free = free;
+    }
+  }
+  return "";
+}
+
+// -------------------------------------------------------------------------------------------------
+
+std::string pdf_updater::initialize() {
+  // We only need to look for startxref roughly within the last kibibyte of the document
+  static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
+  std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
+
+  std::smatch m;
+  if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
+    return "cannot find startxref";
+
+  size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
+  std::set<size_t> loaded_xrefs;
+  std::set<uint> loaded_entries;
+
+  std::vector<pdf_object> throwaway_stack;
+  while (1) {
+    if (loaded_xrefs.count(xref_offset))
+      return "circular xref offsets";
+    if (xref_offset >= document.length())
+      return "invalid xref offset";
+
+    pdf_lexer lex(document.c_str() + xref_offset);
+    auto err = load_xref(lex, loaded_entries);
+    if (!err.empty()) return err;
+
+    auto trailer = parse(lex, throwaway_stack);
+    if (trailer.type != pdf_object::DICT)
+      return pdf_error(trailer, "invalid trailer dictionary");
+    if (loaded_xrefs.empty())
+      this->trailer = trailer.dict;
+    loaded_xrefs.insert(xref_offset);
+
+    const auto prev_offset = trailer.dict.find("Prev");
+    if (prev_offset == trailer.dict.end())
+      break;
+    // FIXME we don't check for size_t over or underflow
+    if (!prev_offset->second.is_integer())
+      return "invalid Prev offset";
+    xref_offset = prev_offset->second.number;
+  }
+
+  trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
+  const auto last_size = trailer.find("Size");
+  if (last_size == trailer.end() || !last_size->second.is_integer() ||
+      last_size->second.number <= 0)
+    return "invalid or missing cross-reference table Size";
+
+  xref_size = last_size->second.number;
+  return "";
+}
+
+pdf_object pdf_updater::get(uint n, uint generation) const {
+  if (n >= xref_size)
+    return {pdf_object::NIL};
+
+  const auto& ref = xref[n];
+  if (ref.free || ref.generation != generation || ref.offset >= document.length())
+    return {pdf_object::NIL};
+
+  pdf_lexer lex(document.c_str() + ref.offset);
+  std::vector<pdf_object> stack;
+  while (1) {
+    auto object = parse(lex, stack);
+    if (object.type == pdf_object::END)
+      return object;
+    if (object.type != pdf_object::OBJECT)
+      stack.push_back(std::move(object));
+    else if (object.n != n || object.generation != generation)
+      return {pdf_object::END, "object mismatch"};
+    else
+      return std::move(object.array.at(0));
+  }
+}
+
+uint pdf_updater::allocate() {
+  assert(xref_size < UINT_MAX);
+
+  auto n = xref_size++;
+  if (xref.size() < xref_size)
+    xref.resize(xref_size);
+
+  // We don't make sure it gets a subsection in the update yet because we
+  // make no attempts at fixing the linked list of free items either
+  return n;
+}
+
+void pdf_updater::update(uint n, std::function<void()> fill) {
+  auto& ref = xref.at(n);
+  ref.offset = document.length() + 1;
+  ref.free = false;
+  updated.insert(n);
+
+  document += ssprintf("\n%u %u obj\n", n, ref.generation);
+  // Separately so that the callback can use document.length() to get the current offset
+  fill();
+  document += "\nendobj";
+}
+
+void pdf_updater::flush_updates() {
+  std::map<uint, size_t> groups;
+  for (auto i = updated.cbegin(); i != updated.cend(); ) {
+    size_t start = *i, count = 1;
+    while (++i != updated.cend() && *i == start + count)
+      count++;
+    groups[start] = count;
+  }
+
+  // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
+  // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
+  if (groups.empty())
+    groups[0] = 0;
+
+  auto startxref = document.length() + 1;
+  document += "\nxref\n";
+  for (const auto& g : groups) {
+    document += ssprintf("%u %zu\n", g.first, g.second);
+    for (size_t i = 0; i < g.second; i++) {
+      auto& ref = xref[g.first + i];
+      document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
+    }
+  }
+
+  trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
+  document += "trailer\n" + pdf_serialize(trailer)
+    + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+/// Make a PDF object representing the given point in time
+static pdf_object pdf_date(time_t timestamp) {
+  struct tm parts;
+  assert(localtime_r(&timestamp, &parts));
+
+  char buf[64];
+  assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
+
+  std::string offset = "Z";
+  auto offset_min = parts.tm_gmtoff / 60;
+  if (parts.tm_gmtoff < 0)
+    offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
+  if (parts.tm_gmtoff > 0)
+    offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
+  return {pdf_object::STRING, buf + offset};
+}
+
+static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
+  auto obj = pdf.get(node_n, node_generation);
+  if (obj.type != pdf_object::DICT)
+    return {pdf_object::NIL};
+
+  // Out of convenience; these aren't filled normally
+  obj.n = node_n;
+  obj.generation = node_generation;
+
+  auto type = obj.dict.find("Type");
+  if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
+    return {pdf_object::NIL};
+  if (type->second.string == "Page")
+    return obj;
+  if (type->second.string != "Pages")
+    return {pdf_object::NIL};
+
+  // XXX technically speaking, this may be an indirect reference.  The correct way to solve this
+  //   seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
+  auto kids = obj.dict.find("Kids");
+  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
+   || kids->second.array.empty()
+   || kids->second.array.at(0).type != pdf_object::REFERENCE)
+    return {pdf_object::NIL};
+
+  // XXX nothing prevents us from recursing in an evil circular graph
+  return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
+}
+
+// -------------------------------------------------------------------------------------------------
+
+// /All/ bytes are checked, except for the signature hexstring itself
+static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len,
+                                         const std::string& pkcs12_path,
+                                         const std::string& pkcs12_pass) {
+  size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
+  if (pkcs12_path.empty())
+    return "undefined path to the signing key";
+
+  auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
+  if (!pkcs12_fp)
+    return pkcs12_path + ": " + strerror(errno);
+
+  // Abandon hope, all ye who enter OpenSSL!  Half of it is undocumented.
+  OpenSSL_add_all_algorithms();
+  ERR_load_crypto_strings();
+  ERR_clear_error();
+
+  PKCS12* p12 = nullptr;
+  EVP_PKEY* private_key = nullptr;
+  X509* certificate = nullptr;
+  STACK_OF(X509)* chain = nullptr;
+  PKCS7* p7 = nullptr;
+  int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
+  BIO* p7bio = nullptr;
+  unsigned char* buf = nullptr;
+
+  // OpenSSL error reasons will usually be of more value than any distinction I can come up with
+  std::string err = "OpenSSL failure";
+
+  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
+   || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
+    err = pkcs12_path + ": parse failure";
+    goto error;
+  }
+  if (!private_key || !certificate) {
+    err = pkcs12_path + ": must contain a private key and a valid certificate chain";
+    goto error;
+  }
+  // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
+  if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
+    err = "the certificate's key usage must include digital signatures or non-repudiation";
+    goto error;
+  }
+  if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
+    err = "the certificate's extended key usage must include S/MIME";
+    goto error;
+  }
+#if 0  // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
+  if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
+    err = "the certificate can't be used for S/MIME digital signatures";
+    goto error;
+  }
+#endif
+
+  // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
+  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
+   || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
+    goto error;
+  // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
+  //   PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
+  for (int i = 0; i < sk_X509_num(chain); i++)
+    if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
+      goto error;
+
+  // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
+  // a copy of the whole document.  Hopefully this writes directly into a digest BIO.
+  if (!(p7bio = PKCS7_dataInit(p7, nullptr))
+   || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
+   || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
+   || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
+    goto error;
+
+#if 0
+  {
+    // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
+    // Context: https://stackoverflow.com/a/29253469
+    auto fp = fopen("pdf_signature.pem", "wb");
+    assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
+  }
+#endif
+
+  if ((len = i2d_PKCS7(p7, &buf)) < 0)
+    goto error;
+  if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
+    // The obvious solution is to increase the allocation... or spend a week reading specifications
+    // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
+    err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
+                   sign_len - 2, size_t(len) * 2);
+    goto error;
+  }
+  for (int i = 0; i < len; i++) {
+    document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
+    document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
+  }
+  err.clear();
+
+error:
+  OPENSSL_free(buf);
+  BIO_free_all(p7bio);
+  PKCS7_free(p7);
+  sk_X509_pop_free(chain, X509_free);
+  X509_free(certificate);
+  EVP_PKEY_free(private_key);
+  PKCS12_free(p12);
+
+  // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
+  while (auto code = ERR_get_error())
+    if (auto reason = ERR_reason_error_string(code))
+      err = err + "; " + reason;
+
+  fclose(pkcs12_fp);
+  return err;
+}
+
+}  // anonymous namespace
+
+// -------------------------------------------------------------------------------------------------
+
+// The presumption here is that the document is valid and that it doesn't employ cross-reference
+// streams from PDF 1.5, or at least constitutes a hybrid-reference file.  The results with
+// PDF 2.0 (2017) are currently unknown as the standard costs money.
+//
+// Carelessly assumes that the version of the original document is at most PDF 1.6.
+//
+// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
+std::string pdf_simple_sign(std::string& document,
+                            const std::string& pkcs12_path,
+                            const std::string& pkcs12_pass) {
+  pdf_updater pdf(document);
+  auto err = pdf.initialize();
+  if (!err.empty())
+    return err;
+
+  auto root_ref = pdf.trailer.find("Root");
+  if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
+    return "trailer does not contain a reference to Root";
+  auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
+  if (root.type != pdf_object::DICT)
+    return "invalid Root dictionary reference";
+
+  // 8.7 Digital Signatures - /signature dictionary/
+  auto sigdict_n = pdf.allocate();
+  size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
+  pdf.update(sigdict_n, [&]{
+    // The timestamp is important for Adobe Acrobat Reader DC.  The ideal would be to use RFC 3161.
+    pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
+                        "   /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
+    byterange_off = pdf.document.size();
+    pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
+    pdf.document.append("\n   /Contents <");
+    sign_off = pdf.document.size();
+    pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
+    pdf.document.append("> >>");
+
+    // We actually need to exclude the hexstring quotes from signing
+    sign_off -= 1;
+    sign_len += 2;
+  });
+
+  // 8.6.3 Field Types - Signature Fields
+  pdf_object sigfield{pdf_object::DICT};
+  sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
+  sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
+  // 8.4.5 Annotations Types - Widget Annotations
+  // We can merge the Signature Annotation and omit Kids here
+  sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
+  sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
+  sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
+  sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
+    {pdf_object::NUMERIC, 0},
+    {pdf_object::NUMERIC, 0},
+    {pdf_object::NUMERIC, 0},
+    {pdf_object::NUMERIC, 0},
+  }}});
+
+  auto sigfield_n = pdf.allocate();
+  pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
+
+  auto pages_ref = root.dict.find("Pages");
+  if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
+    return "invalid Pages reference";
+  auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
+  if (page.type != pdf_object::DICT)
+    return "invalid or unsupported page tree";
+
+  // XXX assuming this won't be an indirectly referenced array
+  auto& annots = page.dict["Annots"];
+  if (annots.type != pdf_object::ARRAY)
+    annots = {pdf_object::ARRAY};
+  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
+  pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
+
+  // 8.6.1 Interactive Form Dictionary
+  // XXX assuming there are no forms already, overwriting everything
+  root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
+    {"Fields", {std::vector<pdf_object>{
+      {pdf_object::REFERENCE, sigfield_n, 0}
+    }}},
+    {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
+  }};
+
+  // Upgrade the document version for SHA-256 etc.
+  // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
+  //   version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
+  root.dict["Version"] = {pdf_object::NAME, "1.6"};
+  pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
+  pdf.flush_updates();
+
+  // Now that we know the length of everything, store byte ranges of what we're about to sign,
+  // which must be everything but the resulting signature itself
+  size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
+  auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
+  if (ranges.length() > byterange_len)
+    return "not enough space reserved for /ByteRange";
+  pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
+  return pdf_fill_in_signature(pdf.document, sign_off, sign_len, pkcs12_path, pkcs12_pass);
+}
-- 
cgit v1.2.3-70-g09d2