diff options
-rw-r--r-- | .clang-format | 8 | ||||
-rw-r--r-- | .gitignore | 8 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | README.adoc | 46 | ||||
-rw-r--r-- | cmd/extfs-pdf/main.go | 141 | ||||
-rw-r--r-- | go.mod | 6 | ||||
-rw-r--r-- | go.sum | 8 | ||||
-rw-r--r-- | lpg/.clang-format | 9 | ||||
-rw-r--r-- | lpg/lpg.cpp | 1160 | ||||
-rw-r--r-- | lpg/lpg.lua | 240 | ||||
-rw-r--r-- | lpg/meson.build | 24 | ||||
-rw-r--r-- | lpg/subprojects/lua++.wrap | 10 | ||||
-rw-r--r-- | lpg/subprojects/packagefiles/lua-5.4.7/LICENSE.build | 20 | ||||
-rw-r--r-- | lpg/subprojects/packagefiles/lua-5.4.7/meson.build | 50 | ||||
-rw-r--r-- | lpg/subprojects/packagefiles/lua-5.4.7/meson_options.txt | 4 | ||||
-rw-r--r-- | lpg/subprojects/packagefiles/lua-5.4.7/onelua.cpp | 1 | ||||
-rw-r--r-- | meson.build | 12 | ||||
-rw-r--r-- | pdf-simple-sign.adoc | 2 | ||||
-rw-r--r-- | pdf-simple-sign.cpp | 98 | ||||
-rw-r--r-- | pdf/pdf.go | 554 | ||||
-rwxr-xr-x | test.sh | 23 |
21 files changed, 2281 insertions, 145 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..13cbee9 --- /dev/null +++ b/.clang-format @@ -0,0 +1,8 @@ +BasedOnStyle: Chromium +ColumnLimit: 100 +IndentCaseLabels: false +AccessModifierOffset: -2 +ContinuationIndentWidth: 2 +SpaceAfterTemplateKeyword: false +SpaceAfterCStyleCast: true +SpacesBeforeTrailingComments: 2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d046c48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +/builddir +/pdf-simple-sign.cflags +/pdf-simple-sign.config +/pdf-simple-sign.creator +/pdf-simple-sign.creator.user +/pdf-simple-sign.cxxflags +/pdf-simple-sign.files +/pdf-simple-sign.includes @@ -1,4 +1,4 @@ -Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name> +Copyright (c) 2017 - 2025, Přemysl Eric Janouch <p@janouch.name> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. diff --git a/README.adoc b/README.adoc index 56eecb8..674594f 100644 --- a/README.adoc +++ b/README.adoc @@ -2,16 +2,26 @@ pdf-simple-sign =============== 'pdf-simple-sign' is a simple PDF signer intended for documents produced by -the Cairo library, GNU troff, ImageMagick, or similar. +the Cairo library (≤ 1.17.4 or using PDF 1.4), GNU troff, ImageMagick, +or similar. I don't aim to extend the functionality any further. The project is fairly self-contained and it should be easy to grasp and change to suit to your needs. +Packages +-------- +Regular releases are sporadic. git master should be stable enough. +You can get a package with the latest development version using Arch Linux's +https://aur.archlinux.org/packages/pdf-simple-sign-git[AUR], +or as a https://git.janouch.name/p/nixexprs[Nix derivation]. + Documentation ------------- See the link:pdf-simple-sign.adoc[man page] for information about usage. The rest of this README will concern itself with externalities. +image:https://pkg.go.dev/badge/janouch.name/pdf-simple-sign@master/pdf["PkgGoDev", link="https://pkg.go.dev/janouch.name/pdf-simple-sign@master/pdf"] + Building -------- Build dependencies: Meson, Asciidoctor, a C++11 compiler, pkg-config + @@ -23,9 +33,39 @@ Runtime dependencies: libcrypto (OpenSSL 1.1 API) $ cd builddir $ ninja -In addition to the C++ version, also included is a native Go port: +Go +~~ +In addition to the C++ version, also included is a native Go port, +which has enhanced PDF 1.5 support: + +---- +$ go install janouch.name/pdf-simple-sign/cmd/pdf-simple-sign@master +---- + +and a crude external VFS for Midnight Commander, that may be used to extract +all streams from a given PDF file: + +---- +$ GOBIN=$HOME/.local/share/mc/extfs.d \ + go install janouch.name/pdf-simple-sign/cmd/extfs-pdf@master +---- + +To enable the VFS, edit your _~/.config/mc/mc.ext.ini_ to contain: + +---- +[pdf] +Type=^PDF +Open=%cd %p/extfs-pdf:// +---- + +Lua PDF generator +~~~~~~~~~~~~~~~~~ +Build dependencies: Meson, a C++17 compiler, pkg-config + +Runtime dependencies: C++ Lua >= 5.3 (custom Meson wrap fallback), + cairo >= 1.15.4, pangocairo, libqrencode - $ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign +This is a parasitic subproject located in the _lpg_ subdirectory. +It will generate its own documentation. Contributing and Support ------------------------ diff --git a/cmd/extfs-pdf/main.go b/cmd/extfs-pdf/main.go new file mode 100644 index 0000000..eab3e2b --- /dev/null +++ b/cmd/extfs-pdf/main.go @@ -0,0 +1,141 @@ +// +// Copyright (c) 2021 - 2024, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +// extfs-pdf is an external VFS plugin for Midnight Commander. +// More serious image extractors should rewrite this to use pdfimages(1). +package main + +import ( + "flag" + "fmt" + "os" + "time" + + "janouch.name/pdf-simple-sign/pdf" +) + +func die(status int, format string, args ...interface{}) { + os.Stderr.WriteString(fmt.Sprintf(format+"\n", args...)) + os.Exit(status) +} + +func usage() { + die(1, "Usage: %s [-h] COMMAND DOCUMENT [ARG...]", os.Args[0]) +} + +func streamSuffix(o *pdf.Object) string { + if filter, _ := o.Dict["Filter"]; filter.Kind == pdf.Name { + switch filter.String { + case "JBIG2Decode": + // This is the file extension used by pdfimages(1). + // This is not a complete JBIG2 standalone file. + return "jb2e" + case "JPXDecode": + return "jp2" + case "DCTDecode": + return "jpg" + case "FlateDecode": + return "zz" + default: + return filter.String + } + } + return "stream" +} + +func list(mtime time.Time, updater *pdf.Updater) { + stamp := mtime.Local().Format("01-02-2006 15:04:05") + for _, o := range updater.ListIndirect() { + object, err := updater.Get(o.N, o.Generation) + size := 0 + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + } else { + // Accidental transformation, retrieving original data is more work. + size = len(object.Serialize()) + } + fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d\n", + size, stamp, o.N, o.Generation) + if object.Kind == pdf.Stream { + fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d.%s\n", len(object.Stream), + stamp, o.N, o.Generation, streamSuffix(&object)) + } + } +} + +func copyout(updater *pdf.Updater, storedFilename, extractTo string) { + var ( + n, generation uint + suffix string + ) + m, err := fmt.Sscanf(storedFilename, "n%dg%d%s", &n, &generation, &suffix) + if m < 2 { + die(3, "%s: %s", storedFilename, err) + } + + object, err := updater.Get(n, generation) + if err != nil { + die(3, "%s: %s", storedFilename, err) + } + + content := []byte(object.Serialize()) + if suffix != "" { + content = object.Stream + } + if err = os.WriteFile(extractTo, content, 0666); err != nil { + die(3, "%s", err) + } +} + +func main() { + flag.Usage = usage + flag.Parse() + if flag.NArg() < 2 { + usage() + } + + command, documentPath := flag.Arg(0), flag.Arg(1) + doc, err := os.ReadFile(documentPath) + if err != nil { + die(1, "%s", err) + } + + mtime := time.UnixMilli(0) + if info, err := os.Stat(documentPath); err == nil { + mtime = info.ModTime() + } + + updater, err := pdf.NewUpdater(doc) + if err != nil { + die(2, "%s", err) + } + + switch command { + default: + die(1, "unsupported command: %s", command) + case "list": + if flag.NArg() != 2 { + usage() + } else { + list(mtime, updater) + } + case "copyout": + if flag.NArg() != 4 { + usage() + } else { + copyout(updater, flag.Arg(2), flag.Arg(3)) + } + } +} @@ -1,8 +1,8 @@ module janouch.name/pdf-simple-sign -go 1.14 +go 1.17 require ( - go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 - golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de + go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 + golang.org/x/crypto v0.10.0 ) @@ -1,12 +1,12 @@ -go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0= -go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8= go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M= go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= -golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI= -golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 h1:CCriYyAfq1Br1aIYettdHZTy8mBTIPo7We18TuO/bak= +go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig= golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= +golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/lpg/.clang-format b/lpg/.clang-format new file mode 100644 index 0000000..339b7e1 --- /dev/null +++ b/lpg/.clang-format @@ -0,0 +1,9 @@ +BasedOnStyle: LLVM +ColumnLimit: 80 +IndentWidth: 4 +TabWidth: 4 +UseTab: ForContinuationAndIndentation +SpaceAfterCStyleCast: true +AlignAfterOpenBracket: DontAlign +AlignOperands: DontAlign +SpacesBeforeTrailingComments: 2 diff --git a/lpg/lpg.cpp b/lpg/lpg.cpp new file mode 100644 index 0000000..341a104 --- /dev/null +++ b/lpg/lpg.cpp @@ -0,0 +1,1160 @@ +// +// lpg: Lua PDF generator +// +// Copyright (c) 2017 - 2025, Přemysl Eric Janouch <p@janouch.name> +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> + +#include <cairo.h> +#include <cairo-pdf.h> +#include <pango/pangocairo.h> + +#include <qrencode.h> + +#include <algorithm> +#include <fstream> +#include <functional> +#include <iostream> +#include <memory> +#include <optional> +#include <sstream> +#include <string> +#include <tuple> +#include <unordered_map> +#include <variant> +#include <vector> + +#include <arpa/inet.h> + +using namespace std; +using attribute = variant<string, double>; + +#define DefWidget(name) struct name : public Widget +struct Widget { + virtual ~Widget() {} + + unordered_map<string, attribute> attributes; + using attribute_map = decltype(attributes); + Widget *setattr(string key, attribute_map::mapped_type value) { + attributes.insert({key, value}); + return this; + } + + optional<attribute> getattr(const string &name) { + if (auto it = attributes.find("_" + name); it != attributes.end()) + return {it->second}; + if (auto it = attributes.find(name); it != attributes.end()) + return {it->second}; + return {}; + } + + /// Top-down attribute propagation. + virtual void apply_attributes(const attribute_map &attrs = {}) { + for (const auto &kv : attrs) + if (*kv.first.c_str() != '_') + attributes.insert(kv); + } + + // We need CAIRO_ROUND_GLYPH_POS_OFF to be set in font options, + // which can only be done internally, se we have to pass a context that + // is based on an actual PDF surface. + // + // Font maps also need some kind of a backend, like Cairo. + + /// Compute and return space required for the widget's contents. + virtual tuple<double, double> prepare([[maybe_unused]] PangoContext *pc) { + return {0, 0}; + } + + /// Compute and return space required for the widget's contents, + /// given a fixed size (favouring any dimension). + virtual tuple<double, double> prepare_for_size(PangoContext *pc, + [[maybe_unused]] double width, [[maybe_unused]] double height) { + return prepare(pc); + } + + /// Render to the context within the designated space, no clipping. + virtual void render([[maybe_unused]] cairo_t *cr, + [[maybe_unused]] double w, [[maybe_unused]] double h) {} +}; + +/// Special container that basically just fucks with the system right now. +DefWidget(Frame) { + unique_ptr<Widget> child; + Frame(Widget *w) : child(w) {} + + virtual void apply_attributes(const attribute_map &attrs) override { + Widget::apply_attributes(attrs); + child->apply_attributes(attributes); + } + + virtual tuple<double, double> prepare(PangoContext *pc) override { + auto d = child->prepare(pc); + if (auto v = getattr("w_override")) + get<0>(d) = get<double>(*v); + if (auto v = getattr("h_override")) + get<1>(d) = get<double>(*v); + return d; + } + + virtual tuple<double, double> prepare_for_size( + PangoContext *pc, double width, double height) override { + if (auto v = getattr("w_override"); v && get<double>(*v) >= 0) + width = get<double>(*v); + if (auto v = getattr("h_override"); v && get<double>(*v) >= 0) + height = get<double>(*v); + auto d = child->prepare_for_size(pc, width, height); + if (auto v = getattr("w_override")) + get<0>(d) = get<double>(*v); + if (auto v = getattr("h_override")) + get<1>(d) = get<double>(*v); + return d; + } + + virtual void render(cairo_t *cr, double w, double h) override { + cairo_save(cr); + + if (auto v = getattr("color")) { + int rgb = get<double>(*v); + cairo_set_source_rgb(cr, ((rgb >> 16) & 0xFF) / 255., + ((rgb >> 8) & 0xFF) / 255., (rgb & 0xFF) / 255.); + } + + child->render(cr, w, h); + cairo_restore(cr); + } +}; + +#define DefContainer(name) struct name : public Container +DefWidget(Container) { + vector<unique_ptr<Widget>> children; + + inline void add() {} + template <typename... Args> void add(Widget *w, Args && ...args) { + children.push_back(unique_ptr<Widget>(w)); + add(args...); + } + + Container(vector<unique_ptr<Widget>> &&children) + : children(std::move(children)) {} + + virtual void apply_attributes(const attribute_map &attrs) override { + Widget::apply_attributes(attrs); + for (auto &i : children) + i->apply_attributes(attributes); + } +}; + +static void finalize_box(vector<double> &sizes, double available) { + double fixed = 0, stretched = 0; + for (auto s : sizes) { + if (s >= 0) + fixed += s; + else + stretched += s; + } + if (stretched) { + auto factor = max(0., available - fixed) / stretched; + for (auto &s : sizes) + if (s < 0) + s *= factor; + } else { + // TODO(p): One should be able to *opt in* for this. + auto redistribute = max(0., available - fixed) / sizes.size(); + for (auto &s : sizes) + s += redistribute; + } +} + +DefContainer(HBox) { + HBox(vector<unique_ptr<Widget>> children = {}) + : Container(std::move(children)) {} + + vector<double> widths; + virtual tuple<double, double> prepare(PangoContext *pc) override { + double w = 0, h = 0; + widths.resize(children.size()); + for (size_t i = 0; i < children.size(); i++) { + auto d = children[i]->prepare(pc); + if ((widths[i] = get<0>(d)) > 0) + w += widths[i]; + h = max(h, get<1>(d)); + } + return {w, h}; + } + + virtual tuple<double, double> prepare_for_size( + PangoContext *pc, double width, double height) override { + double w = 0, h = 0; + widths.resize(children.size()); + for (size_t i = 0; i < children.size(); i++) { + auto d = children[i]->prepare_for_size(pc, width, height); + if ((widths[i] = get<0>(d)) > 0) + w += widths[i]; + h = max(h, get<1>(d)); + } + return {w, h}; + } + + virtual void render(cairo_t *cr, double w, double h) override { + finalize_box(widths, w); + for (size_t i = 0; i < children.size(); i++) { + cairo_save(cr); + children[i]->render(cr, widths[i], h); + cairo_restore(cr); + cairo_translate(cr, widths[i], 0.); + } + } +}; + +DefContainer(VBox) { + VBox(vector<unique_ptr<Widget>> children = {}) + : Container(std::move(children)) {} + + vector<double> heights; + virtual tuple<double, double> prepare(PangoContext *pc) override { + double w = 0, h = 0; + heights.resize(children.size()); + for (size_t i = 0; i < children.size(); i++) { + auto d = children[i]->prepare(pc); + if ((heights[i] = get<1>(d)) > 0) + h += heights[i]; + w = max(w, get<0>(d)); + } + return {w, h}; + } + + virtual tuple<double, double> prepare_for_size( + PangoContext *pc, double width, double height) override { + double w = 0, h = 0; + heights.resize(children.size()); + for (size_t i = 0; i < children.size(); i++) { + auto d = children[i]->prepare_for_size(pc, width, height); + if ((heights[i] = get<1>(d)) > 0) + h += heights[i]; + w = max(w, get<0>(d)); + } + return {w, h}; + } + + virtual void render(cairo_t *cr, double w, double h) override { + finalize_box(heights, h); + for (size_t i = 0; i < children.size(); i++) { + cairo_save(cr); + children[i]->render(cr, w, heights[i]); + cairo_restore(cr); + cairo_translate(cr, 0., heights[i]); + } + } +}; + +/// Fillers just take up space and don't render anything. +DefWidget(Filler) { + double w, h; + Filler(double w = -1, double h = -1) : w(w), h(h) {} + virtual tuple<double, double> prepare(PangoContext *) override { + return {w, h}; + } +}; + +DefWidget(HLine) { + double thickness; + HLine(double thickness = 1) : thickness(thickness) {} + virtual tuple<double, double> prepare(PangoContext *) override { + return {-1, thickness}; + } + virtual void render(cairo_t *cr, double w, double h) override { + cairo_move_to(cr, 0, h / 2); + cairo_line_to(cr, w, h / 2); + cairo_set_line_width(cr, thickness); + cairo_stroke(cr); + } +}; + +DefWidget(VLine) { + double thickness; + VLine(double thickness = 1) : thickness(thickness) {} + virtual tuple<double, double> prepare(PangoContext *) override { + return {thickness, -1}; + } + virtual void render(cairo_t *cr, double w, double h) override { + cairo_move_to(cr, w / 2, 0); + cairo_line_to(cr, w / 2, h); + cairo_set_line_width(cr, thickness); + cairo_stroke(cr); + } +}; + +DefWidget(Text) { + string text; + PangoLayout *layout = nullptr; + double y_offset = 0.; + + Text(string text = "") : text(text) {} + virtual ~Text() override { g_clear_object(&layout); } + + static string escape(const char *s, size_t len) { + auto escapechar = [](char c) -> const char * { + if (c == '<') return "<"; + if (c == '>') return ">"; + if (c == '&') return "&"; + return nullptr; + }; + string escaped; + for (size_t i = 0; i < len; i++) + if (auto entity = escapechar(s[i])) + escaped += entity; + else + escaped += s[i]; + return escaped; + } + + void prepare_layout(PangoContext *pc) { + g_clear_object(&layout); + layout = pango_layout_new(pc); + pango_layout_set_markup(layout, text.c_str(), -1); + pango_layout_set_alignment(layout, PANGO_ALIGN_LEFT); + + auto fd = pango_font_description_new(); + if (auto v = getattr("fontfamily")) + pango_font_description_set_family(fd, get<string>(*v).c_str()); + if (auto v = getattr("fontsize")) + pango_font_description_set_size(fd, get<double>(*v) * PANGO_SCALE); + if (auto v = getattr("fontweight")) + pango_font_description_set_weight(fd, PangoWeight(get<double>(*v))); + + // We need this for the line-height calculation. + auto font_size = + double(pango_font_description_get_size(fd)) / PANGO_SCALE; + if (!font_size) + pango_font_description_set_size(fd, (font_size = 10)); + + // Supposedly this is how this shit works. + // XXX: This will never work if the markup changes the font size. + if (auto v = getattr("lineheight")) { + auto increment = get<double>(*v) - 1; + y_offset = increment * font_size / 2; + pango_layout_set_spacing( + layout, increment * font_size * PANGO_SCALE); + } + + // FIXME: We don't want to override what's in the markup. + pango_layout_set_font_description(layout, fd); + pango_font_description_free(fd); + } + + virtual tuple<double, double> prepare(PangoContext *pc) override { + prepare_layout(pc); + + int w, h; + pango_layout_get_size(layout, &w, &h); + return { + double(w) / PANGO_SCALE, double(h) / PANGO_SCALE + 2 * y_offset}; + } + + virtual tuple<double, double> prepare_for_size( + PangoContext *pc, double width, double) override { + prepare_layout(pc); + + // It's difficult to get vertical text, so wrap horizontally. + pango_layout_set_width(layout, PANGO_SCALE * width); + + int w, h; + pango_layout_get_size(layout, &w, &h); + return { + double(w) / PANGO_SCALE, double(h) / PANGO_SCALE + 2 * y_offset}; + } + + virtual void render(cairo_t *cr, double w, double) + override { + g_return_if_fail(layout); + // Assuming horizontal text, make it span the whole allocation. + pango_layout_set_width(layout, PANGO_SCALE * w); + pango_cairo_update_layout(cr, layout); + cairo_translate(cr, 0, y_offset); + pango_cairo_show_layout(cr, layout); + } +}; + +DefWidget(Link) { + string target_uri; + unique_ptr<Widget> child; + + Link(const string &target_uri, Widget *w) + : target_uri(target_uri), child(w) {} + + virtual void apply_attributes(const attribute_map &attrs) override { + Widget::apply_attributes(attrs); + child->apply_attributes(attributes); + } + + virtual tuple<double, double> prepare(PangoContext *pc) override { + return child->prepare(pc); + } + + virtual void render(cairo_t *cr, double w, double h) override { + cairo_save(cr); + cairo_tag_begin( + cr, CAIRO_TAG_LINK, ("uri='" + target_uri + "'").c_str()); + child->render(cr, w, h); + cairo_tag_end(cr, CAIRO_TAG_LINK); + cairo_restore(cr); + } +}; + +// --- Pictures ---------------------------------------------------------------- + +struct image_info { + double width = 0., height = 0., dpi_x = 72., dpi_y = 72.; +}; + +/// http://libpng.org/pub/png/spec/1.2/PNG-Contents.html +static bool read_png_info(image_info &info, const char *data, size_t length) { + return length >= 24 && !memcmp(data, "\211PNG\r\n\032\n", 8) && + !memcmp(data + 12, "IHDR", 4) && + (info.width = ntohl(*(uint32_t *) (data + 16))) && + (info.height = ntohl(*(uint32_t *) (data + 20))); +} + +DefWidget(Picture) { + double w = 0, h = 0; + double scale_x = 1., scale_y = 1.; + cairo_surface_t *surface = nullptr; + + double postscale_for(double width, double height) { + double w = this->w * scale_x; + double h = this->h * scale_y; + if (w < 0 || h < 0) + return 1; + + double postscale = width / w; + if (h * postscale > height) + postscale = height / h; + return postscale; + } + + virtual tuple<double, double> prepare(PangoContext *) override { + return {w * scale_x, h * scale_y}; + } + + virtual tuple<double, double> prepare_for_size( + PangoContext *pc, double width, double height) override { + auto d = prepare(pc); + auto postscale = postscale_for(width, height); + return {get<0>(d) * postscale, get<1>(d) * postscale}; + } + + virtual void render(cairo_t *cr, double width, double height) override { + if (!surface || width <= 0 || height <= 0) + return; + + // For PDF-A, ISO 19005-3:2012 6.2.8: interpolation is not allowed + // (Cairo sets it on by default). + bool interpolate = true; + + auto pattern = cairo_pattern_create_for_surface(surface); + cairo_pattern_set_filter( + pattern, interpolate ? CAIRO_FILTER_GOOD : CAIRO_FILTER_NEAREST); + + // Maybe we should also center the picture or something... + auto postscale = postscale_for(width, height); + cairo_scale(cr, scale_x * postscale, scale_y * postscale); + cairo_set_source(cr, pattern); + cairo_paint(cr); + + cairo_pattern_destroy(pattern); + } + + static cairo_surface_t *make_surface_png(const string &data) { + using CharRange = pair<const char *, const char *>; + CharRange iterator{&*data.begin(), &*data.end()}; + return cairo_image_surface_create_from_png_stream( + [](void *closure, unsigned char *data, uint len) { + auto i = (CharRange *) closure; + if (i->second - i->first < len) + return CAIRO_STATUS_READ_ERROR; + + memcpy(data, i->first, len); + i->first += len; + return CAIRO_STATUS_SUCCESS; + }, + &iterator); + } + + // Cairo doesn't support PNGs in PDFs by MIME type, + // until then we'll have to parametrize. + static function<cairo_surface_t *()> identify( + const string &picture, image_info &info) { + if (read_png_info(info, picture.data(), picture.length())) + return bind(make_surface_png, picture); + return nullptr; + } + + Picture(const string &filename) { + ifstream t{filename}; + stringstream buffer; + buffer << t.rdbuf(); + string picture = buffer.str(); + + image_info info; + if (auto make_surface = identify(picture, info)) { + surface = make_surface(); + w = info.width; + h = info.height; + scale_x = info.dpi_x / 72.; + scale_y = info.dpi_y / 72.; + } else { + cerr << "warning: unreadable picture: " << filename << endl; + } + } +}; + +// --- QR ---------------------------------------------------------------------- + +DefWidget(QR) { + QRcode *code = nullptr; + double T = 1.; + + QR(string text, double T) : T(T) { + QRinput *data = QRinput_new2( + 0 /* Version, i.e., size, here autoselect */, + QR_ECLEVEL_M /* 15% correction */); + if (!data) + return; + + auto u8 = reinterpret_cast<const unsigned char *>(text.data()); + (void) QRinput_append(data, !QRinput_check(QR_MODE_AN, text.size(), u8) + ? QR_MODE_AN : QR_MODE_8, text.size(), u8); + + code = QRcode_encodeInput(data); + QRinput_free(data); + } + + virtual ~QR() override { + if (code) + QRcode_free(code); + } + + virtual tuple<double, double> prepare(PangoContext *) override { + if (!code) + return {0, 0}; + + return {T * code->width, T * code->width}; + } + + virtual void render(cairo_t *cr, double, double) override { + if (!code) + return; + + auto line = code->data; + for (int y = 0; y < code->width; y++) { + for (int x = 0; x < code->width; x++) { + if (line[x] & 1) + cairo_rectangle(cr, T * x, T * y, T, T); + } + line += code->width; + } + cairo_fill(cr); + } +}; + +// --- Lua Widget -------------------------------------------------------------- + +#define XLUA_WIDGET_METATABLE "widget" + +struct LuaWidget { + // shared_ptr would resolve the reference stealing API design issue. + unique_ptr<Widget> widget; +}; + +static void xlua_widget_check(lua_State *L, LuaWidget *self) { + if (!self->widget) + luaL_error(L, "trying to use a consumed widget reference"); +} + +static attribute xlua_widget_tovalue(lua_State *L, LuaWidget *self, int idx) { + xlua_widget_check(L, self); + if (lua_isnumber(L, idx)) + return lua_tonumber(L, idx); + if (lua_isstring(L, idx)) { + size_t len = 0; + const char *s = lua_tolstring(L, idx, &len); + return string(s, len); + } + luaL_error(L, "expected string or numeric attributes"); + return {}; +} + +static void xlua_widget_set( + lua_State *L, LuaWidget *self, Widget *widget, int idx_attrs) { + self->widget.reset(widget); + if (!idx_attrs) + return; + + lua_pushvalue(L, idx_attrs); + lua_pushnil(L); + while (lua_next(L, -2)) { + if (lua_type(L, -2) == LUA_TSTRING) { + size_t key_len = 0; + const char *key = lua_tolstring(L, -2, &key_len); + widget->setattr( + string(key, key_len), xlua_widget_tovalue(L, self, -1)); + } + lua_pop(L, 1); + } + lua_pop(L, 1); +} + +static int xlua_widget_gc(lua_State *L) { + auto self = (LuaWidget *) luaL_checkudata(L, 1, XLUA_WIDGET_METATABLE); + self->widget.reset(nullptr); + return 0; +} + +static int xlua_widget_index(lua_State *L) { + auto self = (LuaWidget *) luaL_checkudata(L, 1, XLUA_WIDGET_METATABLE); + // In theory, this could also index container children, + // but it does not seem practically useful. + auto key = luaL_checkstring(L, 2); + xlua_widget_check(L, self); + + if (auto it = self->widget->attributes.find(key); + it == self->widget->attributes.end()) + lua_pushnil(L); + else if (auto s = get_if<string>(&it->second)) + lua_pushlstring(L, s->c_str(), s->length()); + else if (auto n = get_if<double>(&it->second)) + lua_pushnumber(L, *n); + return 1; +} + +static int xlua_widget_newindex(lua_State *L) { + auto self = (LuaWidget *) luaL_checkudata(L, 1, XLUA_WIDGET_METATABLE); + auto key = luaL_checkstring(L, 2); + xlua_widget_check(L, self); + + self->widget->attributes[key] = xlua_widget_tovalue(L, self, 3); + return 0; +} + +static luaL_Reg xlua_widget_table[] = { + {"__gc", xlua_widget_gc}, + {"__index", xlua_widget_index}, + {"__newindex", xlua_widget_newindex}, + {} +}; + +// --- Lua Document ------------------------------------------------------------ + +#define XLUA_DOCUMENT_METATABLE "document" + +struct LuaDocument { + cairo_t *cr = nullptr; ///< Cairo + cairo_surface_t *pdf = nullptr; ///< PDF surface + PangoContext *pc = nullptr; ///< Pango context + + double page_width = 0.; ///< Page width in 72 DPI points + double page_height = 0.; ///< Page height in 72 DPI points + double page_margin = 0.; ///< Page margins in 72 DPI points +}; + +static int xlua_document_gc(lua_State *L) { + auto self = (LuaDocument *) luaL_checkudata(L, 1, XLUA_DOCUMENT_METATABLE); + cairo_destroy(self->cr); + g_object_unref(self->pc); + return 0; +} + +static int xlua_document_index(lua_State *L) { + if (auto key = luaL_checkstring(L, 2); *key == '_') + lua_pushnil(L); + else + luaL_getmetafield(L, 1, key); + return 1; +} + +// And probably for links as well. +#if CAIRO_VERSION < CAIRO_VERSION_ENCODE(1, 15, 4) +#error "At least Cairo 1.15.4 is required for setting PDF metadata." +#endif + +static optional<cairo_pdf_metadata_t> metadata_by_name(const char *name) { + if (!strcmp(name, "title")) + return CAIRO_PDF_METADATA_TITLE; + if (!strcmp(name, "author")) + return CAIRO_PDF_METADATA_AUTHOR; + if (!strcmp(name, "subject")) + return CAIRO_PDF_METADATA_SUBJECT; + if (!strcmp(name, "keywords")) + return CAIRO_PDF_METADATA_KEYWORDS; + if (!strcmp(name, "creator")) + return CAIRO_PDF_METADATA_CREATOR; + if (!strcmp(name, "create_date")) + return CAIRO_PDF_METADATA_CREATE_DATE; + if (!strcmp(name, "mod_date")) + return CAIRO_PDF_METADATA_MOD_DATE; + return {}; +} + +static int xlua_document_newindex(lua_State *L) { + auto self = (LuaDocument *) luaL_checkudata(L, 1, XLUA_DOCUMENT_METATABLE); + auto name = luaL_checkstring(L, 2); + auto value = luaL_checkstring(L, 3); + + // These are all read-only in Cairo. + if (auto id = metadata_by_name(name)) + cairo_pdf_surface_set_metadata(self->pdf, id.value(), value); + else + return luaL_error(L, "%s: unknown property"); + return 0; +} + +static int xlua_document_show(lua_State *L) { + auto self = (LuaDocument *) luaL_checkudata(L, 1, XLUA_DOCUMENT_METATABLE); + for (int i = 2; i <= lua_gettop(L); i++) { + auto w = (LuaWidget *) luaL_checkudata(L, i, XLUA_WIDGET_METATABLE); + xlua_widget_check(L, w); + auto widget = w->widget.get(); + widget->apply_attributes(); + + auto inner_width = self->page_width - 2 * self->page_margin; + auto inner_height = self->page_height - 2 * self->page_margin; + widget->prepare_for_size(self->pc, inner_width, inner_height); + + cairo_save(self->cr); + cairo_translate(self->cr, self->page_margin, self->page_margin); + widget->render(self->cr, inner_width, inner_height); + cairo_restore(self->cr); + } + cairo_show_page(self->cr); + return 0; +} + +static luaL_Reg xlua_document_table[] = { + {"__gc", xlua_document_gc}, + {"__index", xlua_document_index}, + {"__newindex", xlua_document_newindex}, + {"show", xlua_document_show}, + {} +}; + +// --- Library ----------------------------------------------------------------- + +// 1 point is 1/72 inch, also applies to PDF surfaces. +static int xlua_cm(lua_State *L) { + lua_pushnumber(L, luaL_checknumber(L, 1) / 2.54 * 72); + return 1; +} + +struct xlua_numpunct : public numpunct<char> { + optional<char> thousands_sep_override; + optional<char> decimal_point_override; + optional<string_type> grouping_override; + + using super = std::numpunct<char>; + + virtual char do_thousands_sep() const override { + return thousands_sep_override.value_or(super::do_thousands_sep()); + } + + virtual char do_decimal_point() const override { + return decimal_point_override.value_or(super::do_decimal_point()); + } + + virtual string_type do_grouping() const override { + return grouping_override.value_or(super::do_grouping()); + } +}; + +static int xlua_ntoa(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + auto np = new xlua_numpunct(); + const char *field = nullptr; + if (lua_getfield(L, 1, (field = "thousands_sep")) != LUA_TNIL) { + size_t len = 0; + auto str = lua_tolstring(L, -1, &len); + if (!str || len != 1) + return luaL_error(L, "invalid %s", field); + np->thousands_sep_override.emplace(str[0]); + } + if (lua_getfield(L, 1, (field = "decimal_point")) != LUA_TNIL) { + size_t len = 0; + auto str = lua_tolstring(L, -1, &len); + if (!str || len != 1) + return luaL_error(L, "invalid %s", field); + np->decimal_point_override.emplace(str[0]); + } + if (lua_getfield(L, 1, (field = "grouping")) != LUA_TNIL) { + size_t len = 0; + auto str = lua_tolstring(L, -1, &len); + if (!str) + return luaL_error(L, "invalid %s", field); + np->grouping_override.emplace(string(str, len)); + } + + ostringstream formatted; + formatted.imbue(locale(locale(), np)); + if (lua_getfield(L, 1, "precision") != LUA_TNIL) { + formatted.setf(formatted.fixed, formatted.floatfield); + formatted.precision(lua_tointeger(L, -1)); + } + + lua_geti(L, 1, 1); + if (lua_isinteger(L, -1)) + formatted << lua_tointeger(L, -1); + else if (lua_isnumber(L, -1)) + formatted << lua_tonumber(L, -1); + else + return luaL_error(L, "number expected as the first field"); + + lua_pushstring(L, formatted.str().c_str()); + return 1; +} + +static int xlua_escape(lua_State *L) { + string escaped; + for (int i = 1; i <= lua_gettop(L); i++) { + size_t len = 0; + const char *s = luaL_checklstring(L, i, &len); + escaped.append(Text::escape(s, len)); + } + lua_pushlstring(L, escaped.data(), escaped.length()); + return 1; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static int xlua_document(lua_State *L) { + const char *filename = luaL_checkstring(L, 1); + lua_Number width = luaL_checknumber(L, 2); + lua_Number height = luaL_checknumber(L, 3); + + LuaDocument *self = + static_cast<LuaDocument *>(lua_newuserdata(L, sizeof *self)); + luaL_setmetatable(L, XLUA_DOCUMENT_METATABLE); + new(self) LuaDocument; + + self->pdf = cairo_pdf_surface_create(filename, + (self->page_width = width), (self->page_height = height)); + self->cr = cairo_create(self->pdf); + cairo_surface_destroy(self->pdf); + + self->page_margin = luaL_optnumber(L, 4, self->page_margin); + + auto pc = self->pc = pango_cairo_create_context(self->cr); + // By default the resolution is set to 96 DPI but the PDF surface uses 72. + pango_cairo_context_set_resolution(pc, 72.); + +#if PANGO_VERSION_CHECK(1, 44, 0) + // Otherwise kerning was broken in Pango before 1.48.6. + // Seems like this issue: https://gitlab.gnome.org/GNOME/pango/-/issues/562 + // and might be related to: https://blogs.gnome.org/mclasen/2019/08/ + pango_context_set_round_glyph_positions(pc, FALSE); +#endif + return 1; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static LuaWidget *xlua_newwidget(lua_State *L) { + LuaWidget *self = + static_cast<LuaWidget *>(lua_newuserdata(L, sizeof *self)); + luaL_setmetatable(L, XLUA_WIDGET_METATABLE); + new(self) LuaWidget; + return self; +} + +static int xlua_filler(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + double width = -1, height = -1; + if (lua_geti(L, 1, 1); !lua_isnoneornil(L, -1)) + width = lua_tonumber(L, -1); + if (lua_geti(L, 1, 2); !lua_isnoneornil(L, -1)) + height = lua_tonumber(L, -1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Filler{width, height}, 1); + return 1; +} + +static int xlua_hline(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + double thickness = 1; + if (lua_geti(L, 1, 1); !lua_isnoneornil(L, -1)) + thickness = lua_tonumber(L, -1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new HLine{thickness}, 1); + return 1; +} + +static int xlua_vline(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + double thickness = 1; + if (lua_geti(L, 1, 1); !lua_isnoneornil(L, -1)) + thickness = lua_tonumber(L, -1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new VLine{thickness}, 1); + return 1; +} + +static string xlua_tostring(lua_State *L, int idx) { + // Automatic conversions are unlikely to be valid XML. + bool escape = !lua_isstring(L, idx); + + size_t length = 0; + const char *s = luaL_tolstring(L, idx, &length); + string text = escape ? Text::escape(s, length) : string(s, length); + lua_pop(L, 1); + return text; +} + +static int xlua_text(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + string text; + for (lua_Integer i = 1, len = luaL_len(L, 1); i <= len; i++) { + lua_geti(L, 1, i); + text.append(xlua_tostring(L, -1)); + lua_pop(L, 1); + } + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Text{text}, 1); + return 1; +} + +static LuaWidget *xlua_towidget(lua_State *L) { + if (luaL_testudata(L, -1, XLUA_WIDGET_METATABLE)) + return (LuaWidget *) luaL_checkudata(L, -1, XLUA_WIDGET_METATABLE); + + string text = xlua_tostring(L, -1); + lua_pop(L, 1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Text{text}, 0); + return self; +} + +static int xlua_frame(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + if (luaL_len(L, 1) != 1) + return luaL_error(L, "expected one child widget"); + + lua_geti(L, 1, 1); + auto child = xlua_towidget(L); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Frame{child->widget.release()}, 1); + return 1; +} + +static int xlua_link(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + if (luaL_len(L, 1) != 2) + return luaL_error(L, "expected link target and one child widget"); + + lua_geti(L, 1, 1); + size_t length = 0; + const char *s = luaL_tolstring(L, -1, &length); + string target(s, length); + lua_pop(L, 1); + + lua_geti(L, 1, 2); + auto child = xlua_towidget(L); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Link{target, child->widget.release()}, 1); + return 1; +} + +static int xlua_hbox(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + vector<unique_ptr<Widget>> children; + for (lua_Integer i = 1, len = luaL_len(L, 1); i <= len; i++) { + lua_geti(L, 1, i); + children.emplace_back(xlua_towidget(L)->widget.release()); + lua_pop(L, 1); + } + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new HBox{std::move(children)}, 1); + return 1; +} + +static int xlua_vbox(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + + vector<unique_ptr<Widget>> children; + for (lua_Integer i = 1, len = luaL_len(L, 1); i <= len; i++) { + lua_geti(L, 1, i); + children.emplace_back(xlua_towidget(L)->widget.release()); + lua_pop(L, 1); + } + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new VBox{std::move(children)}, 1); + return 1; +} + +static int xlua_picture(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + if (luaL_len(L, 1) != 1) + return luaL_error(L, "expected picture path"); + + lua_geti(L, 1, 1); + size_t length = 0; + const char *s = luaL_tolstring(L, -1, &length); + string filename(s, length); + lua_pop(L, 1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new Picture{filename}, 1); + return 1; +} + +static int xlua_qr(lua_State *L) { + luaL_checktype(L, 1, LUA_TTABLE); + if (luaL_len(L, 1) != 2) + return luaL_error(L, "expected contents and module size"); + + lua_geti(L, 1, 1); + size_t length = 0; + const char *s = luaL_tolstring(L, -1, &length); + string target(s, length); + lua_pop(L, 1); + + lua_geti(L, 1, 2); + auto T = lua_tonumber(L, -1); + lua_pop(L, 1); + + auto self = xlua_newwidget(L); + xlua_widget_set(L, self, new QR{target, T}, 1); + return 1; +} + +static luaL_Reg xlua_library[] = { + {"cm", xlua_cm}, + {"ntoa", xlua_ntoa}, + {"escape", xlua_escape}, + + {"Document", xlua_document}, + + {"Filler", xlua_filler}, + {"HLine", xlua_hline}, + {"VLine", xlua_vline}, + {"Text", xlua_text}, + {"Frame", xlua_frame}, + {"Link", xlua_link}, + {"HBox", xlua_hbox}, + {"VBox", xlua_vbox}, + {"Picture", xlua_picture}, + {"QR", xlua_qr}, + {} +}; + +// --- Initialisation, event handling ------------------------------------------ + +static int xlua_error_handler(lua_State *L) { + // Don't add tracebacks when there's already one, and pass nil through. + const char *string = luaL_optstring(L, 1, NULL); + if (string && !strchr(string, '\n')) { + luaL_traceback(L, L, string, 1); + lua_remove(L, 1); + } + return 1; +} + +static void *xlua_alloc(void *, void *ptr, size_t, size_t n_size) { + if (n_size) + return realloc(ptr, n_size); + + free(ptr); + return NULL; +} + +static int xlua_panic(lua_State *L) { + cerr << "fatal: Lua panicked: " << lua_tostring(L, -1) << endl; + lua_close(L); + exit(EXIT_FAILURE); + return 0; +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + cerr << "Usage: " << argv[0] << " program.lua [args...]" << endl; + return 1; + } + + lua_State *L = lua_newstate(xlua_alloc, NULL); + if (!L) { + cerr << "fatal: Lua initialization failed" << endl; + return 1; + } + lua_atpanic(L, xlua_panic); + luaL_openlibs(L); + luaL_checkversion(L); + + luaL_newlib(L, xlua_library); + lua_setglobal(L, "lpg"); + + luaL_newmetatable(L, XLUA_DOCUMENT_METATABLE); + luaL_setfuncs(L, xlua_document_table, 0); + lua_pop(L, 1); + + luaL_newmetatable(L, XLUA_WIDGET_METATABLE); + luaL_setfuncs(L, xlua_widget_table, 0); + lua_pop(L, 1); + + luaL_checkstack(L, argc, NULL); + + // Joining the first two might make a tiny bit more sense. + lua_createtable(L, argc - 1, 0); + lua_pushstring(L, (string(argv[0]) + " " + argv[1]).c_str()); + lua_rawseti(L, 1, 1); + for (int i = 2; i < argc; i++) { + lua_pushstring(L, argv[i]); + lua_rawseti(L, 1, i - 1); + } + lua_setglobal(L, "arg"); + + int status = 0; + lua_pushcfunction(L, xlua_error_handler); + if ((status = luaL_loadfile(L, strcmp(argv[1], "-") ? argv[1] : NULL))) + goto error; + for (int i = 2; i < argc; i++) + lua_pushstring(L, argv[i]); + if ((status = lua_pcall(L, argc - 2, 0, 1))) + goto error; + lua_close(L); + return 0; + +error: + // Lua will unfortunately discard exceptions that it hasn't thrown itself. + if (const char *err = lua_tostring(L, -1)) + cerr << "error: " << err << endl; + else + cerr << "error: " << status << endl; + lua_close(L); + return 1; +} diff --git a/lpg/lpg.lua b/lpg/lpg.lua new file mode 100644 index 0000000..caf7b3e --- /dev/null +++ b/lpg/lpg.lua @@ -0,0 +1,240 @@ +#!/usr/bin/env lpg +local project_url = "https://git.janouch.name/p/pdf-simple-sign" + +function h1 (title) + return lpg.VBox {fontsize=18., fontweight=600, + title, lpg.HLine {2}, lpg.Filler {-1, 6}} +end +function h2 (title) + return lpg.VBox {fontsize=16., fontweight=600, + lpg.Filler {-1, 8}, title, lpg.HLine {1}, lpg.Filler {-1, 6}} +end +function h3 (title) + return lpg.VBox {fontsize=14., fontweight=600, + lpg.Filler {-1, 8}, title, lpg.HLine {.25}, lpg.Filler {-1, 6}} +end +function p (...) + return lpg.VBox {..., lpg.Filler {-1, 6}} +end +function code (...) + return lpg.VBox { + lpg.Filler {-1, 4}, + lpg.HBox { + lpg.Filler {12}, + lpg.VBox {"<tt>" .. table.concat {...} .. "</tt>"}, + lpg.Filler {}, + }, + lpg.Filler {-1, 6}, + } +end +function define (name, ...) + return lpg.VBox { + lpg.Filler {-1, 2}, + lpg.Text {fontweight=600, name}, lpg.Filler {-1, 2}, + lpg.HBox {lpg.Filler {12}, lpg.VBox {...}, lpg.Filler {}}, + lpg.Filler {-1, 2}, + } +end +function pad (widget) + return lpg.VBox { + lpg.Filler {-1, 2}, + lpg.HBox {lpg.Filler {4}, widget, lpg.Filler {}, lpg.Filler {4}}, + lpg.Filler {-1, 2}, + } +end + +local page1 = lpg.VBox {fontfamily="sans serif", fontsize=12., + h1("lpg User Manual"), + p("<b>lpg</b> is a Lua-based PDF document generator, exposing a trivial " .. + "layouting engine on top of the Cairo graphics library, " .. + "with manual paging."), + p("The author has primarily been using this system to typeset invoices."), + + h2("Synopsis"), + p("<b>lpg</b> <i>program.lua</i> [<i>args...</i>]"), + + h2("API"), + p("The Lua program receives <b>lpg</b>'s and its own path joined " .. + "as <tt>arg[0]</tt>. Any remaining sequential elements " .. + "of this table represent the passed <i>args</i>."), + + h3("Utilities"), + + define("lpg.cm (centimeters)", + p("Returns how many document points are needed " .. + "for the given physical length.")), + + define("lpg.ntoa {number [, precision=…]\n" .. + "\t[, thousands_sep=…] [, decimal_point=…] [, grouping=…]}", + p("Formats a number using the C++ localization " .. + "and I/O libraries. " .. + "For example, the following call results in “3 141,59”:"), + code("ntoa {3141.592, precision=2,\n" .. + " thousands_sep=\" \", decimal_point=\",\", " .. + "grouping=\"\\003\"}")), + + define("lpg.escape (values...)", + p("Interprets all values as strings, " .. + "and escapes them to be used as literal text—" .. + "all text within <b>lpg</b> is parsed as Pango markup, " .. + "which is a subset of XML.")), + + h3("PDF documents"), + + define("lpg.Document (filename, width, height [, margin])", + p("Returns a new <i>Document</i> object, whose pages are all " .. + "the same size in 72 DPI points, as specified by <b>width</b> " .. + "and <b>height</b>. The <b>margin</b> is used by <b>show</b> " .. + "on all sides of pages."), + p("The file is finalized when the object is garbage collected.")), + + define("<i>Document</i>.title, author, subject, keywords, " .. + "creator, create_date, mod_date", + p("Write-only PDF <i>Info</i> dictionary metadata strings.")), + + define("<i>Document</i>:show ([widget...])", + p("Starts a new document page, and renders <i>Widget</i> trees over " .. + "the whole print area.")), + + lpg.Filler {}, +} + +local page2 = lpg.VBox {fontfamily="sans serif", fontsize=12., + h3("Widgets"), + p("The layouting system makes heavy use of composition, " .. + "and thus stays simple."), + p("For convenience, anywhere a <i>Widget</i> is expected but another " .. + "kind of value is received, <b>lpg.Text</b> widget will be invoked " .. + "on that value."), + p("Once a <i>Widget</i> is included in another <i>Widget</i>, " .. + "the original Lua object can no longer be used, " .. + "as its reference has been consumed."), + p("<i>Widgets</i> can be indexed by strings to get or set " .. + "their <i>attributes</i>. All <i>Widget</i> constructor tables " .. + "also accept attributes, for convenience. Attributes can be " .. + "either strings or numbers, mostly only act " .. + "on specific <i>Widget</i> kinds, and are hereditary. " .. + "Prefix their names with an underscore to set them ‘privately’."), + p("<i>Widget</i> sizes can be set negative, which signals to their " .. + "container that they should take any remaining space, " .. + "after all their siblings’ requests have been satisfied. " .. + "When multiple widgets make this request, that space is distributed " .. + "in proportion to these negative values."), + + define("lpg.Filler {[width] [, height]}", + p("Returns a new blank widget with the given dimensions, " .. + "which default to -1, -1.")), + define("lpg.HLine {[thickness]}", + p("Returns a new widget that draws a simple horizontal line " .. + "of the given <b>thickness</b>.")), + define("lpg.VLine {[thickness]}", + p("Returns a new widget that draws a simple vertical line " .. + "of the given <b>thickness</b>.")), + define("lpg.Text {[value...]}", + p("Returns a new text widget that renders the concatenation of all " .. + "passed values filtered through Lua’s <b>tostring</b> " .. + "function. Non-strings will additionally be escaped."), + define("<i>Text</i>.fontfamily, fontsize, fontweight, lineheight", + p("Various font properties, similar to their CSS counterparts."))), + define("lpg.Frame {widget}", + p("Returns a special container widget that can override " .. + "a few interesting properties."), + define("<i>Frame</i>.color", + p("Text and line colour, for example <tt>0xff0000</tt> for red.")), + define("<i>Frame</i>.w_override", + p("Forcefully changes the child <i>Widget</i>’s " .. + "requested width, such as to negative values.")), + define("<i>Frame</i>.h_override", + p("Forcefully changes the child <i>Widget</i>’s " .. + "requested height, such as to negative values."))), + + lpg.Filler {}, +} + +local page3 = lpg.VBox {fontfamily="sans serif", fontsize=12., + define("lpg.Link {target, widget}", + p("Returns a new hyperlink widget pointing to the <b>target</b>, " .. + "which is a URL. The hyperlink applies " .. + "to the entire area of the child widget. " .. + "It has no special appearance.")), + define("lpg.HBox {[widget...]}", + p("Returns a new container widget that places children " .. + "horizontally, from left to right."), + p("If any space remains after satisfying the children widgets’ " .. + "requisitions, it is distributed equally amongst all of them. " .. + "Also see the note about negative sizes.")), + define("lpg.VBox {[widget...]}", + p("Returns a new container widget that places children " .. + "vertically, from top to bottom.")), + define("lpg.Picture {filename}", + p("Returns a new picture widget, showing the given <b>filename</b>, " .. + "which currently must be in the PNG format. " .. + "Pictures are rescaled to fit, but keep their aspect ratio.")), + define("lpg.QR {contents, module}", + p("Returns a new QR code widget, encoding the <b>contents</b> " .. + "string using the given <b>module</b> size. " .. + "The QR code version is chosen automatically.")), + + h2("Examples"), + p("See the source code of this user manual " .. + "for the general structure of scripts."), + + h3("Size distribution and composition"), + lpg.VBox { + lpg.HLine {}, + lpg.HBox { + lpg.VLine {}, lpg.Frame {_w_override=lpg.cm(3), pad "3cm"}, + lpg.VLine {}, lpg.Frame {pad "Measured"}, + lpg.VLine {}, lpg.Frame {_w_override=-1, pad "-1"}, + lpg.VLine {}, lpg.Frame {_w_override=-2, pad "-2"}, + lpg.VLine {}, + }, + lpg.HLine {}, + }, + lpg.Filler {-1, 6}, + code([[ +<small><b>function</b> pad (widget) + <b>local function</b> f (...) <b>return</b> lpg.Filler {...} <b>end</b> + <b>return</b> lpg.VBox {f(-1, 2), lpg.HBox {f(4), w, f(), f(4)}, f(-1, 2)} +<b>end</b> + +lpg.VBox {lpg.HLine {}, lpg.HBox { + lpg.VLine {}, lpg.Frame {_w_override=lpg.cm(3), pad "3cm"}, + lpg.VLine {}, lpg.Frame {pad "Measured"}, + lpg.VLine {}, lpg.Frame {_w_override=-1, pad "-1"}, + lpg.VLine {}, lpg.Frame {_w_override=-2, pad "-2"}, + lpg.VLine {}, +}, lpg.HLine {}}</small>]]), + + h3("Clickable QR code link"), + lpg.HBox { + lpg.VBox { + p("Go here to report bugs, request features, " .. + "or submit pull requests:"), + code(([[ +url = "%s" +lpg.Link {url, lpg.QR {url, 2.5}}]]):format(project_url)), + }, + lpg.Filler {}, + lpg.Link {project_url, lpg.QR {project_url, 2.5}}, + }, + + lpg.Filler {}, +} + +if #arg < 1 then + io.stderr:write("Usage: " .. arg[0] .. " OUTPUT-PDF..." .. "\n") + os.exit(false) +end +local width, height, margin = lpg.cm(21), lpg.cm(29.7), lpg.cm(2.0) +for i = 1, #arg do + local pdf = lpg.Document(arg[i], width, height, margin) + pdf.title = "lpg User Manual" + pdf.subject = "lpg User Manual" + pdf.author = "Přemysl Eric Janouch" + pdf.creator = ("lpg (%s)"):format(project_url) + + pdf:show(page1) + pdf:show(page2) + pdf:show(page3) +end diff --git a/lpg/meson.build b/lpg/meson.build new file mode 100644 index 0000000..3ce57ea --- /dev/null +++ b/lpg/meson.build @@ -0,0 +1,24 @@ +project('lpg', 'cpp', default_options : ['cpp_std=c++17'], + version : '1.1.1') + +conf = configuration_data() +conf.set_quoted('PROJECT_NAME', meson.project_name()) +conf.set_quoted('PROJECT_VERSION', meson.project_version()) +configure_file(output : 'config.h', configuration : conf) + +luapp = dependency('lua++', allow_fallback : true) +cairo = dependency('cairo') +pangocairo = dependency('pangocairo') +libqrencode = dependency('libqrencode') +lpg_exe = executable('lpg', 'lpg.cpp', + install : true, + dependencies : [luapp, cairo, pangocairo, libqrencode]) + +# XXX: https://github.com/mesonbuild/meson/issues/825 +docdir = get_option('datadir') / 'doc' / meson.project_name() +lpg_pdf = custom_target('lpg.pdf', + output : 'lpg.pdf', + input : 'lpg.lua', + command : [lpg_exe, '@INPUT@', '@OUTPUT@'], + install_dir : docdir, + build_by_default : true) diff --git a/lpg/subprojects/lua++.wrap b/lpg/subprojects/lua++.wrap new file mode 100644 index 0000000..1ddf5d6 --- /dev/null +++ b/lpg/subprojects/lua++.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = lua-5.4.7 +source_url = https://github.com/lua/lua/archive/refs/tags/v5.4.7.tar.gz +source_filename = lua-5.4.7.tar.gz +source_hash = 5c39111b3fc4c1c9e56671008955a1730f54a15b95e1f1bd0752b868b929d8e3 +patch_directory = lua-5.4.7 + +[provide] +lua++-5.4 = lua_dep +lua++ = lua_dep diff --git a/lpg/subprojects/packagefiles/lua-5.4.7/LICENSE.build b/lpg/subprojects/packagefiles/lua-5.4.7/LICENSE.build new file mode 100644 index 0000000..c62f655 --- /dev/null +++ b/lpg/subprojects/packagefiles/lua-5.4.7/LICENSE.build @@ -0,0 +1,20 @@ +Copyright (c) 2025 Přemysl Eric Janouch <p@janouch.name> +Copyright (c) 2021 The Meson development team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lpg/subprojects/packagefiles/lua-5.4.7/meson.build b/lpg/subprojects/packagefiles/lua-5.4.7/meson.build new file mode 100644 index 0000000..dbc8ff6 --- /dev/null +++ b/lpg/subprojects/packagefiles/lua-5.4.7/meson.build @@ -0,0 +1,50 @@ +project( + 'lua-5.4', + 'cpp', + license : 'MIT', + meson_version : '>=0.49.2', + version : '5.4.7', + default_options : ['c_std=c99', 'warning_level=2'], +) + +cxx = meson.get_compiler('cpp') + +# Skip bogus warning. +add_project_arguments(cxx.get_supported_arguments( + '-Wno-string-plus-int', '-Wno-stringop-overflow'), language : 'cpp') + +# Platform-specific defines. +is_posix = host_machine.system() in ['cygwin', 'darwin', 'dragonfly', 'freebsd', + 'gnu', 'haiku', 'linux', 'netbsd', 'openbsd', 'sunos'] +if is_posix + add_project_arguments('-DLUA_USE_POSIX', language : 'cpp') +endif + +# Library dependencies. +lua_lib_deps = [cxx.find_library('m', required : false)] + +if meson.version().version_compare('>= 0.62') + dl_dep = dependency('dl', required : get_option('loadlib')) +else + dl_dep = cxx.find_library('dl', required : get_option('loadlib')) +endif + +if dl_dep.found() + lua_lib_deps += dl_dep + add_project_arguments('-DLUA_USE_DLOPEN', language : 'cpp') +endif + +# Targets. +add_project_arguments('-DMAKE_LIB', language : 'cpp') +lua_lib = static_library( + 'lua', + 'onelua.cpp', + dependencies : lua_lib_deps, + implicit_include_directories : false, +) + +inc = include_directories('.') +lua_dep = declare_dependency( + link_with : lua_lib, + include_directories : inc, +) diff --git a/lpg/subprojects/packagefiles/lua-5.4.7/meson_options.txt b/lpg/subprojects/packagefiles/lua-5.4.7/meson_options.txt new file mode 100644 index 0000000..ea6f6c4 --- /dev/null +++ b/lpg/subprojects/packagefiles/lua-5.4.7/meson_options.txt @@ -0,0 +1,4 @@ +option( + 'loadlib', type : 'feature', + description : 'Allow Lua to "require" C extension modules' +) diff --git a/lpg/subprojects/packagefiles/lua-5.4.7/onelua.cpp b/lpg/subprojects/packagefiles/lua-5.4.7/onelua.cpp new file mode 100644 index 0000000..6517028 --- /dev/null +++ b/lpg/subprojects/packagefiles/lua-5.4.7/onelua.cpp @@ -0,0 +1 @@ +#include "onelua.c" diff --git a/meson.build b/meson.build index b9d18ad..668b612 100644 --- a/meson.build +++ b/meson.build @@ -2,8 +2,8 @@ project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'], version : '1.1.1') conf = configuration_data() -conf.set('PROJECT_NAME', '"' + meson.project_name() + '"') -conf.set('PROJECT_VERSION', '"' + meson.project_version() + '"') +conf.set_quoted('PROJECT_NAME', meson.project_name()) +conf.set_quoted('PROJECT_VERSION', meson.project_version()) configure_file(output : 'config.h', configuration : conf) cryptodep = dependency('libcrypto') @@ -14,10 +14,10 @@ executable('pdf-simple-sign', 'pdf-simple-sign.cpp', asciidoctor = find_program('asciidoctor') foreach page : ['pdf-simple-sign'] custom_target('manpage for ' + page, - input: page + '.adoc', output: page + '.1', - command: [asciidoctor, '-b', 'manpage', + input : page + '.adoc', output: page + '.1', + command : [asciidoctor, '-b', 'manpage', '-a', 'release-version=' + meson.project_version(), '@INPUT@', '-o', '@OUTPUT@'], - install: true, - install_dir: join_paths(get_option('mandir'), 'man1')) + install : true, + install_dir : join_paths(get_option('mandir'), 'man1')) endforeach diff --git a/pdf-simple-sign.adoc b/pdf-simple-sign.adoc index 491fa64..4ab1bc5 100644 --- a/pdf-simple-sign.adoc +++ b/pdf-simple-sign.adoc @@ -14,7 +14,7 @@ Synopsis Description ----------- -'pdf-simple-sign' is a simple PDF signer intended for documents produced by +*pdf-simple-sign* is a simple PDF signer intended for documents produced by the Cairo library, GNU troff, ImageMagick, or similar. As such, it currently comes with some restrictions: diff --git a/pdf-simple-sign.cpp b/pdf-simple-sign.cpp index 6bbb284..8b9d1fe 100644 --- a/pdf-simple-sign.cpp +++ b/pdf-simple-sign.cpp @@ -16,26 +16,26 @@ // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. // -#include <cstdio> #include <cmath> +#include <cstdio> #undef NDEBUG #include <cassert> -#include <vector> #include <map> -#include <regex> #include <memory> +#include <regex> #include <set> +#include <vector> #if defined __GLIBCXX__ && __GLIBCXX__ < 20140422 #error Need libstdc++ >= 4.9 for <regex> #endif -#include <unistd.h> #include <getopt.h> #include <openssl/err.h> -#include <openssl/x509v3.h> #include <openssl/pkcs12.h> +#include <openssl/x509v3.h> +#include <unistd.h> #include "config.h" @@ -55,7 +55,7 @@ static std::string concatenate(const std::vector<std::string>& v, const std::str template<typename... Args> std::string ssprintf(const std::string& format, Args... args) { - size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1; + size_t size = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; std::unique_ptr<char[]> buf(new char[size]); std::snprintf(buf.get(), size, format.c_str(), args...); return std::string(buf.get(), buf.get() + size - 1); @@ -64,7 +64,7 @@ std::string ssprintf(const std::string& format, Args... args) { // ------------------------------------------------------------------------------------------------- /// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens. -/// The PDF Reference doesn't actually speak of tokens. +/// The PDF Reference doesn't actually speak of tokens, though ISO 32000-1:2008 does. struct pdf_object { enum type { END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING, @@ -262,14 +262,12 @@ static std::string pdf_serialize(const pdf_object& o) { case pdf_object::NL: return "\n"; case pdf_object::NIL: return "null"; case pdf_object::BOOL: return o.number ? "true" : "false"; - case pdf_object::NUMERIC: - { + case pdf_object::NUMERIC: { if (o.is_integer()) return std::to_string((long long) o.number); return std::to_string(o.number); } case pdf_object::KEYWORD: return o.string; - case pdf_object::NAME: - { + case pdf_object::NAME: { std::string escaped = "/"; for (char c : o.string) { if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c)) @@ -279,8 +277,7 @@ static std::string pdf_serialize(const pdf_object& o) { } return escaped; } - case pdf_object::STRING: - { + case pdf_object::STRING: { std::string escaped; for (char c : o.string) { if (c == '\\' || c == '(' || c == ')') @@ -293,15 +290,13 @@ static std::string pdf_serialize(const pdf_object& o) { case pdf_object::E_ARRAY: return "]"; case pdf_object::B_DICT: return "<<"; case pdf_object::E_DICT: return ">>"; - case pdf_object::ARRAY: - { + case pdf_object::ARRAY: { std::vector<std::string> v; for (const auto& i : o.array) v.push_back(pdf_serialize(i)); return "[ " + concatenate(v, " ") + " ]"; } - case pdf_object::DICT: - { + case pdf_object::DICT: { std::string s; for (const auto i : o.dict) // FIXME the key is also supposed to be escaped by pdf_serialize() @@ -372,8 +367,8 @@ pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack auto g = stack.back(); stack.pop_back(); auto n = stack.back(); stack.pop_back(); - if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX - || !n.is_integer() || n.number < 0 || n.number > UINT_MAX) + if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX || + !n.is_integer() || n.number < 0 || n.number > UINT_MAX) return {pdf_object::END, "invalid object ID pair"}; pdf_object obj{pdf_object::OBJECT}; @@ -397,8 +392,8 @@ pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const { auto g = stack.back(); stack.pop_back(); auto n = stack.back(); stack.pop_back(); - if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX - || !n.is_integer() || n.number < 0 || n.number > UINT_MAX) + if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX || + !n.is_integer() || n.number < 0 || n.number > UINT_MAX) return {pdf_object::END, "invalid reference ID pair"}; pdf_object ref{pdf_object::REFERENCE}; @@ -415,8 +410,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co case pdf_object::COMMENT: // These are not important to parsing, not even for this procedure's needs return parse(lex, stack); - case pdf_object::B_ARRAY: - { + case pdf_object::B_ARRAY: { std::vector<pdf_object> array; while (1) { auto object = parse(lex, array); @@ -428,8 +422,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co } return array; } - case pdf_object::B_DICT: - { + case pdf_object::B_DICT: { std::vector<pdf_object> array; while (1) { auto object = parse(lex, array); @@ -477,8 +470,8 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie break; auto second = parse(lex, throwaway_stack); - if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX - || !second.is_integer() || second.number < 0 || second.number > UINT_MAX) + if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX || + !second.is_integer() || second.number < 0 || second.number > UINT_MAX) return "invalid xref section header"; const size_t start = object.number; @@ -487,9 +480,9 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie auto off = parse(lex, throwaway_stack); auto gen = parse(lex, throwaway_stack); auto key = parse(lex, throwaway_stack); - if (!off.is_integer() || off.number < 0 || off.number > document.length() - || !gen.is_integer() || gen.number < 0 || gen.number > 65535 - || key.type != pdf_object::KEYWORD) + if (!off.is_integer() || off.number < 0 || off.number > document.length() || + !gen.is_integer() || gen.number < 0 || gen.number > 65535 || + key.type != pdf_object::KEYWORD) return "invalid xref entry"; bool free = true; @@ -550,8 +543,8 @@ std::string pdf_updater::initialize() { const auto prev_offset = trailer.dict.find("Prev"); if (prev_offset == trailer.dict.end()) break; - // FIXME we don't check for size_t over or underflow - if (!prev_offset->second.is_integer()) + // FIXME do not read offsets and sizes as floating point numbers + if (!prev_offset->second.is_integer() || prev_offset->second.number < 0) return "invalid Prev offset"; xref_offset = prev_offset->second.number; } @@ -657,8 +650,8 @@ void pdf_updater::flush_updates() { } trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)}; - document += "trailer\n" + pdf_serialize(trailer) - + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref); + document += + "trailer\n" + pdf_serialize(trailer) + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref); } // ------------------------------------------------------------------------------------------------- @@ -700,9 +693,9 @@ static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_ge // XXX technically speaking, this may be an indirect reference. The correct way to solve this // seems to be having "pdf_updater" include a wrapper around "obj.dict.find" auto kids = obj.dict.find("Kids"); - if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY - || kids->second.array.empty() - || kids->second.array.at(0).type != pdf_object::REFERENCE) + if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY || + kids->second.array.empty() || + kids->second.array.at(0).type != pdf_object::REFERENCE) return {pdf_object::NIL}; // XXX nothing prevents us from recursing in an evil circular graph @@ -740,8 +733,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, // OpenSSL error reasons will usually be of more value than any distinction I can come up with std::string err = "OpenSSL failure"; - if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr)) - || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) { + if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr)) || + !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) { err = pkcs12_path + ": parse failure"; goto error; } @@ -766,8 +759,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, #endif // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer - if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags)) - || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags)) + if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags)) || + !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags)) goto error; // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A): // PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value) @@ -777,10 +770,10 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making // a copy of the whole document. Hopefully this writes directly into a digest BIO. - if (!(p7bio = PKCS7_dataInit(p7, nullptr)) - || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off) - || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len) - || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio)) + if (!(p7bio = PKCS7_dataInit(p7, nullptr)) || + (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off) || + (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len) || + BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio)) goto error; #if 0 @@ -850,7 +843,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) { // 8.7 Digital Signatures - /signature dictionary/ auto sigdict_n = pdf.allocate(); size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0; - pdf.update(sigdict_n, [&]{ + pdf.update(sigdict_n, [&] { // The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161. pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n" " /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange "); @@ -883,7 +876,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) { }}}); auto sigfield_n = pdf.allocate(); - pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); }); + pdf.update(sigfield_n, [&] { pdf.document += pdf_serialize(sigfield); }); auto pages_ref = root.dict.find("Pages"); if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE) @@ -901,7 +894,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) { annots = {pdf_object::ARRAY}; } annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0); - pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); }); + pdf.update(page.n, [&] { pdf.document += pdf_serialize(page); }); // 8.6.1 Interactive Form Dictionary if (root.dict.count("AcroForm")) @@ -918,7 +911,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) { if (pdf.version(root) < 16) root.dict["Version"] = {pdf_object::NAME, "1.6"}; - pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); }); + pdf.update(root_ref->second.n, [&] { pdf.document += pdf_serialize(root); }); pdf.flush_updates(); // Now that we know the length of everything, store byte ranges of what we're about to sign, @@ -947,9 +940,9 @@ static void die(int status, const char* format, ...) { int main(int argc, char* argv[]) { auto invocation_name = argv[0]; - auto usage = [=]{ + auto usage = [=] { die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS", - invocation_name); + invocation_name); }; static struct option opts[] = { @@ -963,8 +956,7 @@ int main(int argc, char* argv[]) { long reservation = 4096; while (1) { int option_index = 0; - auto c = getopt_long(argc, const_cast<char* const*>(argv), - "hVr:", opts, &option_index); + auto c = getopt_long(argc, const_cast<char* const*>(argv), "hVr:", opts, &option_index); if (c == -1) break; @@ -1,5 +1,5 @@ // -// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name> +// Copyright (c) 2018 - 2024, Přemysl Eric Janouch <p@janouch.name> // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted. @@ -18,6 +18,8 @@ package pdf import ( "bytes" + "compress/zlib" + "encoding/binary" "encoding/hex" "errors" "fmt" @@ -59,20 +61,22 @@ const ( // higher-level objects Array Dict + Stream Indirect Reference ) -// Object is a PDF token/object thingy. Objects may be composed either from +// Object is a PDF token/object thingy. Objects may be composed either from // one or a sequence of tokens. The PDF Reference doesn't actually speak -// of tokens. +// of tokens, though ISO 32000-1:2008 does. type Object struct { Kind ObjectKind String string // Comment/Keyword/Name/String Number float64 // Bool, Numeric Array []Object // Array, Indirect - Dict map[string]Object // Dict, in the future also Stream + Dict map[string]Object // Dict, Stream + Stream []byte // Stream N, Generation uint // Indirect, Reference } @@ -118,6 +122,13 @@ func NewDict(d map[string]Object) Object { return Object{Kind: Dict, Dict: d} } +func NewStream(d map[string]Object, s []byte) Object { + if d == nil { + d = make(map[string]Object) + } + return Object{Kind: Stream, Dict: d, Stream: s} +} + func NewIndirect(o Object, n, generation uint) Object { return Object{Kind: Indirect, N: n, Generation: generation, Array: []Object{o}} @@ -458,6 +469,10 @@ func (o *Object) Serialize() string { fmt.Fprint(b, " /", k, " ", v.Serialize()) } return "<<" + b.String() + " >>" + case Stream: + d := NewDict(o.Dict) + d.Dict["Length"] = NewNumeric(float64(len(o.Stream))) + return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream" case Indirect: return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation, o.Array[0].Serialize()) @@ -471,8 +486,9 @@ func (o *Object) Serialize() string { // ----------------------------------------------------------------------------- type ref struct { - offset int64 // file offset or N of the next free entry + offset int64 // file offset, or N of the next free entry, or index generation uint // object generation + compressed *uint // PDF 1.5: N of the containing compressed object nonfree bool // whether this N is taken (for a good zero value) } @@ -497,6 +513,65 @@ type Updater struct { Trailer map[string]Object } +// ListIndirect returns the whole cross-reference table as Reference Objects. +func (u *Updater) ListIndirect() []Object { + result := []Object{} + for i := 0; i < len(u.xref); i++ { + if u.xref[i].nonfree { + result = append(result, NewReference(uint(i), u.xref[i].generation)) + } + } + return result +} + +func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) { + lenStack := len(*stack) + if lenStack < 1 { + return newError("missing stream dictionary") + } + dict := (*stack)[lenStack-1] + if dict.Kind != Dict { + return newError("stream not preceded by a dictionary") + } + + *stack = (*stack)[:lenStack-1] + length, ok := dict.Dict["Length"] + if !ok { + return newError("missing stream Length") + } + length, err := u.Dereference(length) + if err != nil { + return length, err + } + if !length.IsUint() || length.Number > math.MaxInt { + return newError("stream Length not an unsigned integer") + } + + // Expect exactly one newline. + if nl, err := lex.Next(); err != nil { + return nl, err + } else if nl.Kind != NL { + return newError("stream does not start with a newline") + } + + size := int(length.Number) + if len(lex.P) < size { + return newError("stream is longer than the document") + } + + dict.Kind = Stream + dict.Stream = lex.P[:size] + lex.P = lex.P[size:] + + // Skip any number of trailing newlines or comments. + if end, err := u.parse(lex, stack); err != nil { + return end, err + } else if end.Kind != Keyword || end.String != "endstream" { + return newError("improperly terminated stream") + } + return dict, nil +} + func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) { lenStack := len(*stack) if lenStack < 2 { @@ -590,15 +665,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) { } return NewDict(dict), nil case Keyword: - // Appears in the document body, typically needs - // to access the cross-reference table. - // - // TODO(p): Use the xref to read /Length etc. once we - // actually need to read such objects; presumably - // streams can use the Object.String member. switch token.String { case "stream": - return newError("streams are not supported yet") + // Appears in the document body, + // typically needs to access the cross-reference table. + return u.parseStream(lex, stack) case "obj": return u.parseIndirect(lex, stack) case "R": @@ -610,16 +681,159 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) { } } -func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error { +func (u *Updater) loadXrefEntry( + n uint, r ref, loadedEntries map[uint]struct{}) { + if _, ok := loadedEntries[n]; ok { + return + } + if lenXref := uint(len(u.xref)); n >= lenXref { + u.xref = append(u.xref, make([]ref, n-lenXref+1)...) + } + loadedEntries[n] = struct{}{} + + u.xref[n] = r +} + +func (u *Updater) loadXrefStream( + lex *Lexer, stack []Object, loadedEntries map[uint]struct{}) ( + Object, error) { + var object Object + for { + var err error + if object, err = u.parse(lex, &stack); err != nil { + return New(End), fmt.Errorf("invalid xref table: %s", err) + } else if object.Kind == End { + return newError("invalid xref table") + } + + // For the sake of simplicity, keep stacking until we find an object. + if object.Kind == Indirect { + break + } + + stack = append(stack, object) + } + + // ISO 32000-2:2020 7.5.8.2 Cross-reference stream dictionary + stream := object.Array[0] + if stream.Kind != Stream { + return newError("invalid xref table") + } + if typ, ok := stream.Dict["Type"]; !ok || + typ.Kind != Name || typ.String != "XRef" { + return newError("invalid xref stream") + } + + data, err := u.GetStreamData(stream) + if err != nil { + return New(End), fmt.Errorf("invalid xref stream: %s", err) + } + + size, ok := stream.Dict["Size"] + if !ok || !size.IsUint() || size.Number <= 0 { + return newError("invalid or missing cross-reference stream Size") + } + + type pair struct{ start, count uint } + pairs := []pair{} + if index, ok := stream.Dict["Index"]; !ok { + pairs = append(pairs, pair{0, uint(size.Number)}) + } else { + if index.Kind != Array || len(index.Array)%2 != 0 { + return newError("invalid cross-reference stream Index") + } + + a := index.Array + for i := 0; i < len(a); i += 2 { + if !a[i].IsUint() || !a[i+1].IsUint() { + return newError("invalid cross-reference stream Index") + } + pairs = append(pairs, pair{uint(a[i].Number), uint(a[i+1].Number)}) + } + } + + w, ok := stream.Dict["W"] + if !ok || w.Kind != Array || len(w.Array) != 3 || + !w.Array[0].IsUint() || !w.Array[1].IsUint() || !w.Array[2].IsUint() { + return newError("invalid or missing cross-reference stream W") + } + + w1 := uint(w.Array[0].Number) + w2 := uint(w.Array[1].Number) + w3 := uint(w.Array[2].Number) + if w2 == 0 { + return newError("invalid cross-reference stream W") + } + + unit := w1 + w2 + w3 + if uint(len(data))%unit != 0 { + return newError("invalid cross-reference stream length") + } + + readField := func(data []byte, width uint) (uint, []byte) { + var n uint + for ; width != 0; width-- { + n = n<<8 | uint(data[0]) + data = data[1:] + } + return n, data + } + + // ISO 32000-2:2020 7.5.8.3 Cross-reference stream data + for _, pair := range pairs { + for i := uint(0); i < pair.count; i++ { + if uint(len(data)) < unit { + return newError("premature cross-reference stream EOF") + } + + var f1, f2, f3 uint = 1, 0, 0 + if w1 > 0 { + f1, data = readField(data, w1) + } + f2, data = readField(data, w2) + if w3 > 0 { + f3, data = readField(data, w3) + } + + var r ref + switch f1 { + case 0: + r.offset = int64(f2) + r.generation = f3 + case 1: + r.offset = int64(f2) + r.generation = f3 + r.nonfree = true + case 2: + r.offset = int64(f3) + r.compressed = &f2 + r.nonfree = true + default: + // TODO(p): It should be treated as a reference to + // the null object. We can't currently represent that. + return newError("unsupported cross-reference stream contents") + } + + u.loadXrefEntry(pair.start+i, r, loadedEntries) + } + } + + stream.Kind = Dict + stream.Stream = nil + return stream, nil +} + +func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) ( + Object, error) { var throwawayStack []Object - if keyword, _ := u.parse(lex, - &throwawayStack); keyword.Kind != Keyword || keyword.String != "xref" { - return errors.New("invalid xref table") + if object, _ := u.parse(lex, + &throwawayStack); object.Kind != Keyword || object.String != "xref" { + return u.loadXrefStream(lex, []Object{object}, loadedEntries) } for { object, _ := u.parse(lex, &throwawayStack) if object.Kind == End { - return errors.New("unexpected EOF while looking for the trailer") + return newError("unexpected EOF while looking for the trailer") } if object.Kind == Keyword && object.String == "trailer" { break @@ -627,7 +841,7 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error { second, _ := u.parse(lex, &throwawayStack) if !object.IsUint() || !second.IsUint() { - return errors.New("invalid xref section header") + return newError("invalid xref section header") } start, count := uint(object.Number), uint(second.Number) @@ -639,33 +853,29 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error { off.Number > float64(len(u.Document)) || !gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 || key.Kind != Keyword { - return errors.New("invalid xref entry") + return newError("invalid xref entry") } free := true if key.String == "n" { free = false } else if key.String != "f" { - return errors.New("invalid xref entry") - } - - n := start + i - if _, ok := loadedEntries[n]; ok { - continue + return newError("invalid xref entry") } - if lenXref := uint(len(u.xref)); n >= lenXref { - u.xref = append(u.xref, make([]ref, n-lenXref+1)...) - } - loadedEntries[n] = struct{}{} - u.xref[n] = ref{ + u.loadXrefEntry(start+i, ref{ offset: int64(off.Number), generation: uint(gen.Number), nonfree: !free, - } + }, loadedEntries) } } - return nil + + trailer, _ := u.parse(lex, &throwawayStack) + if trailer.Kind != Dict { + return newError("invalid trailer dictionary") + } + return trailer, nil } // ----------------------------------------------------------------------------- @@ -695,7 +905,6 @@ func NewUpdater(document []byte) (*Updater, error) { loadedXrefs := make(map[int64]struct{}) loadedEntries := make(map[uint]struct{}) - var throwawayStack []Object for { if _, ok := loadedXrefs[xrefOffset]; ok { return nil, errors.New("circular xref offsets") @@ -705,24 +914,26 @@ func NewUpdater(document []byte) (*Updater, error) { } lex := Lexer{u.Document[xrefOffset:]} - if err := u.loadXref(&lex, loadedEntries); err != nil { + trailer, err := u.loadXref(&lex, loadedEntries) + if err != nil { return nil, err } - trailer, _ := u.parse(&lex, &throwawayStack) - if trailer.Kind != Dict { - return nil, errors.New("invalid trailer dictionary") - } if len(loadedXrefs) == 0 { u.Trailer = trailer.Dict } loadedXrefs[xrefOffset] = struct{}{} + // TODO(p): Descend into XRefStm here first, if present, + // which is also a linked list. + + // We allow for mixed cross-reference tables and streams + // within a single Prev list, although this should never occur. prevOffset, ok := trailer.Dict["Prev"] if !ok { break } - // FIXME: We don't check for size_t over or underflow. + // FIXME: Do not read offsets and sizes as floating point numbers. if !prevOffset.IsInteger() { return nil, errors.New("invalid Prev offset") } @@ -764,18 +975,115 @@ func (u *Updater) Version(root *Object) int { return 0 } +func (u *Updater) getFromObjStm(nObjStm, n uint) (Object, error) { + if nObjStm == n { + return newError("ObjStm recursion") + } + + stream, err := u.Get(nObjStm, 0) + if err != nil { + return stream, err + } + if stream.Kind != Stream { + return newError("invalid ObjStm") + } + if typ, ok := stream.Dict["Type"]; !ok || + typ.Kind != Name || typ.String != "ObjStm" { + return newError("invalid ObjStm") + } + + data, err := u.GetStreamData(stream) + if err != nil { + return New(End), fmt.Errorf("invalid ObjStm: %s", err) + } + entryN, ok := stream.Dict["N"] + if !ok || !entryN.IsUint() || entryN.Number <= 0 { + return newError("invalid ObjStm N") + } + entryFirst, ok := stream.Dict["First"] + if !ok || !entryFirst.IsUint() || entryFirst.Number <= 0 { + return newError("invalid ObjStm First") + } + + // NOTE: This means descending into that stream if n is not found here. + // It is meant to be an object reference. + if extends, ok := stream.Dict["Extends"]; ok && extends.Kind != Nil { + return newError("ObjStm extensions are unsupported") + } + + count := uint(entryN.Number) + first := uint(entryFirst.Number) + if first > uint(len(data)) { + return newError("invalid ObjStm First") + } + + lex1 := Lexer{data[:first]} + data = data[first:] + + type pair struct{ n, offset uint } + pairs := []pair{} + for i := uint(0); i < count; i++ { + var throwawayStack []Object + objN, _ := u.parse(&lex1, &throwawayStack) + objOffset, _ := u.parse(&lex1, &throwawayStack) + if !objN.IsUint() || !objOffset.IsUint() { + return newError("invalid ObjStm pairs") + } + pairs = append(pairs, pair{uint(objN.Number), uint(objOffset.Number)}) + } + for i, pair := range pairs { + if pair.offset > uint(len(data)) || + i > 0 && pairs[i-1].offset >= pair.offset { + return newError("invalid ObjStm pairs") + } + } + + for i, pair := range pairs { + if pair.n != n { + continue + } + + if i+1 < len(pairs) { + data = data[pair.offset:pairs[i+1].offset] + } else { + data = data[pair.offset:] + } + + lex2 := Lexer{data} + var stack []Object + for { + object, err := u.parse(&lex2, &stack) + if err != nil { + return object, err + } else if object.Kind == End { + break + } else { + stack = append(stack, object) + } + } + if len(stack) == 0 { + return newError("empty ObjStm object") + } + return stack[0], nil + } + return newError("object not found in ObjStm") +} + // Get retrieves an object by its number and generation--may return // Nil or End with an error. -// -// TODO(p): We should fix all uses of this not to eat the error. func (u *Updater) Get(n, generation uint) (Object, error) { if n >= u.xrefSize { return New(Nil), nil } ref := u.xref[n] - if !ref.nonfree || ref.generation != generation || - ref.offset >= int64(len(u.Document)) { + if !ref.nonfree || ref.generation != generation { + return New(Nil), nil + } + + if ref.compressed != nil { + return u.getFromObjStm(*ref.compressed, n) + } else if ref.offset >= int64(len(u.Document)) { return New(Nil), nil } @@ -796,6 +1104,14 @@ func (u *Updater) Get(n, generation uint) (Object, error) { } } +// Derefence dereferences Reference objects, and passes the other kinds through. +func (u *Updater) Dereference(o Object) (Object, error) { + if o.Kind != Reference { + return o, nil + } + return u.Get(o.N, o.Generation) +} + // Allocate allocates a new object number. func (u *Updater) Allocate() uint { n := u.xrefSize @@ -822,8 +1138,8 @@ type BytesWriter interface { WriteString(s string) (n int, err error) } -// Update appends an updated object to the end of the document. The fill -// callback must write exactly one PDF object. +// Update appends an updated object to the end of the document. +// The fill callback must write exactly one PDF object. func (u *Updater) Update(n uint, fill func(buf BytesWriter)) { oldRef := u.xref[n] u.updated[n] = struct{}{} @@ -843,20 +1159,62 @@ func (u *Updater) Update(n uint, fill func(buf BytesWriter)) { u.Document = buf.Bytes() } -// FlushUpdates writes an updated cross-reference table and trailer. -func (u *Updater) FlushUpdates() { - updated := make([]uint, 0, len(u.updated)) - for n := range u.updated { - updated = append(updated, n) +func (u *Updater) flushXRefStm(updated []uint, buf *bytes.Buffer) { + // The cross-reference stream has to point to itself. + // XXX: We only duplicate Update code here due to how we currently buffer. + n := u.Allocate() + updated = append(updated, n) + + u.updated[n] = struct{}{} + u.xref[n] = ref{ + offset: int64(buf.Len() + 1), + generation: 0, + nonfree: true, } - sort.Slice(updated, func(i, j int) bool { - return updated[i] < updated[j] + + index, b := []Object{}, []byte{} + write := func(f1 byte, f2, f3 uint64) { + b = append(b, f1) + b = binary.BigEndian.AppendUint64(b, f2) + b = binary.BigEndian.AppendUint64(b, f3) + } + for i := 0; i < len(updated); { + start, stop := updated[i], updated[i]+1 + for i++; i < len(updated) && updated[i] == stop; i++ { + stop++ + } + + index = append(index, + NewNumeric(float64(start)), NewNumeric(float64(stop-start))) + for ; start < stop; start++ { + ref := u.xref[start] + if ref.compressed != nil { + write(2, uint64(*ref.compressed), uint64(ref.offset)) + } else if ref.nonfree { + write(1, uint64(ref.offset), uint64(ref.generation)) + } else { + write(0, uint64(ref.offset), uint64(ref.generation)) + } + } + } + + u.Trailer["Size"] = NewNumeric(float64(u.xrefSize)) + u.Trailer["Index"] = NewArray(index) + u.Trailer["W"] = NewArray([]Object{ + NewNumeric(1), NewNumeric(8), NewNumeric(8), }) - buf := bytes.NewBuffer(u.Document) - startXref := buf.Len() + 1 - buf.WriteString("\nxref\n") + for _, key := range []string{ + "Filter", "DecodeParms", "F", "FFilter", "FDecodeParms", "DL"} { + delete(u.Trailer, key) + } + stream := NewStream(u.Trailer, b) + fmt.Fprintf(buf, "\n%d 0 obj\n%s\nendobj", n, stream.Serialize()) +} + +func (u *Updater) flushXRefTable(updated []uint, buf *bytes.Buffer) { + buf.WriteString("\nxref\n") for i := 0; i < len(updated); { start, stop := updated[i], updated[i]+1 for i++; i < len(updated) && updated[i] == stop; i++ { @@ -865,8 +1223,9 @@ func (u *Updater) FlushUpdates() { fmt.Fprintf(buf, "%d %d\n", start, stop-start) for ; start < stop; start++ { + // XXX: We should warn about any object streams here. ref := u.xref[start] - if ref.nonfree { + if ref.nonfree && ref.compressed == nil { fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation) } else { fmt.Fprintf(buf, "%010d %05d f \n", ref.offset, ref.generation) @@ -883,10 +1242,38 @@ func (u *Updater) FlushUpdates() { u.Trailer["Size"] = NewNumeric(float64(u.xrefSize)) trailer := NewDict(u.Trailer) + fmt.Fprintf(buf, "trailer\n%s", trailer.Serialize()) +} + +// FlushUpdates writes an updated cross-reference table and trailer, or stream. +func (u *Updater) FlushUpdates() { + updated := make([]uint, 0, len(u.updated)) + for n := range u.updated { + updated = append(updated, n) + } + sort.Slice(updated, func(i, j int) bool { + return updated[i] < updated[j] + }) + + // It does not seem to be possible to upgrade a PDF file + // from trailer dictionaries to cross-reference streams, + // so keep continuity either way. + // + // (Downgrading from cross-reference streams using XRefStm would not + // create a true hybrid-reference file, although it should work.) + buf := bytes.NewBuffer(u.Document) + startXref := buf.Len() + 1 /* '\n' */ + if typ, _ := u.Trailer["Type"]; typ.Kind == Name && typ.String == "XRef" { + u.flushXRefStm(updated, buf) + } else { + u.flushXRefTable(updated, buf) + } - fmt.Fprintf(buf, "trailer\n%s\nstartxref\n%d\n%%%%EOF\n", - trailer.Serialize(), startXref) + fmt.Fprintf(buf, "\nstartxref\n%d\n%%%%EOF\n", startXref) u.Document = buf.Bytes() + u.updated = make(map[uint]struct{}) + + u.Trailer["Prev"] = NewNumeric(float64(startXref)) } // ----------------------------------------------------------------------------- @@ -904,17 +1291,47 @@ func NewDate(ts time.Time) Object { return NewString(string(buf)) } +// GetStreamData returns the actual data stored in a stream object, +// applying any filters. +func (u *Updater) GetStreamData(stream Object) ([]byte, error) { + if f, ok := stream.Dict["F"]; ok && f.Kind != Nil { + return nil, errors.New("stream data in other files are unsupported") + } + + // Support just enough to decode a common cross-reference stream. + if filter, ok := stream.Dict["Filter"]; !ok { + return stream.Stream, nil + } else if filter.Kind != Name || filter.String != "FlateDecode" { + return nil, errors.New("unsupported stream Filter") + } + + // TODO(p): Support << /Columns N /Predictor 12 >> + // which usually appears in files with cross-reference streams. + if parms, ok := stream.Dict["DecodeParms"]; ok && parms.Kind != Nil { + return nil, errors.New("DecodeParms are not supported") + } + + r, err := zlib.NewReader(bytes.NewReader(stream.Stream)) + if err != nil { + return nil, err + } + + var b bytes.Buffer + _, err = b.ReadFrom(r) + return b.Bytes(), err +} + // GetFirstPage retrieves the first page of the given page (sub)tree reference, // or returns a Nil object if unsuccessful. -func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object { - obj, _ := u.Get(nodeN, nodeGeneration) - if obj.Kind != Dict { +func (u *Updater) GetFirstPage(node Object) Object { + obj, err := u.Dereference(node) + if err != nil || obj.Kind != Dict { return New(Nil) } // Out of convenience; these aren't filled normally. - obj.N = nodeN - obj.Generation = nodeGeneration + obj.N = node.N + obj.Generation = node.Generation if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name { return New(Nil) @@ -934,7 +1351,7 @@ func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object { } // XXX: Nothing prevents us from recursing in an evil circular graph. - return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation) + return u.GetFirstPage(kids.Array[0]) } // ----------------------------------------------------------------------------- @@ -1128,7 +1545,10 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate, if !ok || rootRef.Kind != Reference { return nil, errors.New("trailer does not contain a reference to Root") } - root, _ := pdf.Get(rootRef.N, rootRef.Generation) + root, err := pdf.Dereference(rootRef) + if err != nil { + return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err) + } if root.Kind != Dict { return nil, errors.New("invalid Root dictionary reference") } @@ -1182,7 +1602,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate, if !ok || pagesRef.Kind != Reference { return nil, errors.New("invalid Pages reference") } - page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation) + page := pdf.GetFirstPage(pagesRef) if page.Kind != Dict { return nil, errors.New("invalid or unsupported page tree") } @@ -1204,7 +1624,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate, }) // 8.6.1 Interactive Form Dictionary - if _, ok := root.Dict["AcroForm"]; ok { + if acroform, ok := root.Dict["AcroForm"]; ok && acroform.Kind != Nil { return nil, errors.New("the document already contains forms, " + "they would be overwritten") } @@ -11,11 +11,15 @@ mkdir tmp # Create documents in various tools log "Creating source documents" -inkscape --pipe --export-filename=tmp/cairo.pdf <<'EOF' 2>/dev/null || : +inkscape --pipe --export-filename=tmp/cairo.pdf --export-pdf-version=1.4 \ +<<'EOF' 2>/dev/null || : <svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg> EOF -date | tee tmp/lowriter.txt | groff -T pdf > tmp/groff.pdf || : +date > tmp/lowriter.txt +if command -v gropdf >/dev/null +then groff -T pdf < tmp/lowriter.txt > tmp/groff.pdf +fi lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || : convert rose: tmp/imagemagick.pdf || : @@ -45,7 +49,11 @@ openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \ -CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \ -extensions smime -extfile tmp/cert.cfg 2>/dev/null openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null + +# The second line accomodates the Go signer, +# which doesn't support SHA-256 within pkcs12 handling openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \ + -certpbe PBE-SHA1-3DES -keypbe PBE-SHA1-3DES -macalg sha1 \ -export -passout pass: -out tmp/key-pair.p12 for tool in "$@"; do @@ -55,6 +63,12 @@ for tool in "$@"; do result=${source%.pdf}.signed.pdf $tool "$source" "$result" tmp/key-pair.p12 "" pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation + + # Only some of our generators use PDF versions higher than 1.5 + log "Testing $tool for version detection" + grep -q "/Version /1[.]6" "$result" \ + || grep -q "^%PDF-1[.][67]" "$result" \ + || die "Version detection seems to misbehave (no upgrade)" done log "Testing $tool for expected failures" @@ -63,11 +77,6 @@ for tool in "$@"; do $tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \ && die "Too low reservations shouldn't succeed" - # Our generators do not use PDF versions higher than 1.5 - log "Testing $tool for version detection" - grep -q "/Version /1.6" "$result" \ - || die "Version detection seems to misbehave (no upgrade)" - sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt" $tool "$source.alt" "$result.alt" tmp/key-pair.p12 "" grep -q "/Version /1.6" "$result.alt" \ |