aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LICENSE2
-rw-r--r--README.adoc32
-rw-r--r--cmd/extfs-pdf/main.go25
-rw-r--r--go.mod4
-rw-r--r--go.sum4
-rw-r--r--pdf-simple-sign.adoc2
-rw-r--r--pdf/pdf.go442
-rwxr-xr-xtest.sh23
8 files changed, 464 insertions, 70 deletions
diff --git a/LICENSE b/LICENSE
index 5e342a0..7511f3e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2017 - 2021, Přemysl Eric Janouch <p@janouch.name>
+Copyright (c) 2017 - 2024, Přemysl Eric Janouch <p@janouch.name>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
diff --git a/README.adoc b/README.adoc
index 50dc9e6..10e581f 100644
--- a/README.adoc
+++ b/README.adoc
@@ -2,11 +2,19 @@ pdf-simple-sign
===============
'pdf-simple-sign' is a simple PDF signer intended for documents produced by
-the Cairo library, GNU troff, ImageMagick, or similar.
+the Cairo library (≤ 1.17.4 or using PDF 1.4), GNU troff, ImageMagick,
+or similar.
I don't aim to extend the functionality any further. The project is fairly
self-contained and it should be easy to grasp and change to suit to your needs.
+Packages
+--------
+Regular releases are sporadic. git master should be stable enough.
+You can get a package with the latest development version using Arch Linux's
+https://aur.archlinux.org/packages/pdf-simple-sign-git[AUR],
+or as a https://git.janouch.name/p/nixexprs[Nix derivation].
+
Documentation
-------------
See the link:pdf-simple-sign.adoc[man page] for information about usage.
@@ -25,14 +33,28 @@ Runtime dependencies: libcrypto (OpenSSL 1.1 API)
$ cd builddir
$ ninja
-In addition to the C++ version, also included is a native Go port:
+In addition to the C++ version, also included is a native Go port,
+which has enhanced PDF 1.5 support:
- $ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign
+----
+$ go install janouch.name/pdf-simple-sign/cmd/pdf-simple-sign@master
+----
-And a crude external VFS for Midnight Commander, that may be used to extract
+and a crude external VFS for Midnight Commander, that may be used to extract
all streams from a given PDF file:
- $ go get janouch.name/pdf-simple-sign/cmd/extfs-pdf
+----
+$ GOBIN=$HOME/.local/share/mc/extfs.d \
+ go install janouch.name/pdf-simple-sign/cmd/extfs-pdf@master
+----
+
+To enable the VFS, edit your _~/.config/mc/mc.ext.ini_ to contain:
+
+----
+[pdf]
+Type=^PDF
+Open=%cd %p/extfs-pdf://
+----
Contributing and Support
------------------------
diff --git a/cmd/extfs-pdf/main.go b/cmd/extfs-pdf/main.go
index 74f8433..eab3e2b 100644
--- a/cmd/extfs-pdf/main.go
+++ b/cmd/extfs-pdf/main.go
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
+// Copyright (c) 2021 - 2024, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
@@ -21,6 +21,7 @@ import (
"flag"
"fmt"
"os"
+ "time"
"janouch.name/pdf-simple-sign/pdf"
)
@@ -45,6 +46,8 @@ func streamSuffix(o *pdf.Object) string {
return "jp2"
case "DCTDecode":
return "jpg"
+ case "FlateDecode":
+ return "zz"
default:
return filter.String
}
@@ -52,7 +55,8 @@ func streamSuffix(o *pdf.Object) string {
return "stream"
}
-func list(updater *pdf.Updater) {
+func list(mtime time.Time, updater *pdf.Updater) {
+ stamp := mtime.Local().Format("01-02-2006 15:04:05")
for _, o := range updater.ListIndirect() {
object, err := updater.Get(o.N, o.Generation)
size := 0
@@ -62,11 +66,11 @@ func list(updater *pdf.Updater) {
// Accidental transformation, retrieving original data is more work.
size = len(object.Serialize())
}
- fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d\n",
- size, o.N, o.Generation)
+ fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d\n",
+ size, stamp, o.N, o.Generation)
if object.Kind == pdf.Stream {
- fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d.%s\n",
- len(object.Stream), o.N, o.Generation, streamSuffix(&object))
+ fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d.%s\n", len(object.Stream),
+ stamp, o.N, o.Generation, streamSuffix(&object))
}
}
}
@@ -76,7 +80,7 @@ func copyout(updater *pdf.Updater, storedFilename, extractTo string) {
n, generation uint
suffix string
)
- m, err := fmt.Sscanf(storedFilename, "%d-%d%s", &n, &generation, &suffix)
+ m, err := fmt.Sscanf(storedFilename, "n%dg%d%s", &n, &generation, &suffix)
if m < 2 {
die(3, "%s: %s", storedFilename, err)
}
@@ -108,6 +112,11 @@ func main() {
die(1, "%s", err)
}
+ mtime := time.UnixMilli(0)
+ if info, err := os.Stat(documentPath); err == nil {
+ mtime = info.ModTime()
+ }
+
updater, err := pdf.NewUpdater(doc)
if err != nil {
die(2, "%s", err)
@@ -120,7 +129,7 @@ func main() {
if flag.NArg() != 2 {
usage()
} else {
- list(updater)
+ list(mtime, updater)
}
case "copyout":
if flag.NArg() != 4 {
diff --git a/go.mod b/go.mod
index 18b68b6..0e84ffc 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,6 @@ module janouch.name/pdf-simple-sign
go 1.17
require (
- go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
- golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
+ go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352
+ golang.org/x/crypto v0.10.0
)
diff --git a/go.sum b/go.sum
index a187090..4cf11b0 100644
--- a/go.sum
+++ b/go.sum
@@ -1,8 +1,12 @@
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
+go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 h1:CCriYyAfq1Br1aIYettdHZTy8mBTIPo7We18TuO/bak=
+go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
+golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/pdf-simple-sign.adoc b/pdf-simple-sign.adoc
index 491fa64..4ab1bc5 100644
--- a/pdf-simple-sign.adoc
+++ b/pdf-simple-sign.adoc
@@ -14,7 +14,7 @@ Synopsis
Description
-----------
-'pdf-simple-sign' is a simple PDF signer intended for documents produced by
+*pdf-simple-sign* is a simple PDF signer intended for documents produced by
the Cairo library, GNU troff, ImageMagick, or similar. As such, it currently
comes with some restrictions:
diff --git a/pdf/pdf.go b/pdf/pdf.go
index 92d18e9..1fcdaa4 100644
--- a/pdf/pdf.go
+++ b/pdf/pdf.go
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2018 - 2021, Přemysl Eric Janouch <p@janouch.name>
+// Copyright (c) 2018 - 2024, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
@@ -18,6 +18,8 @@ package pdf
import (
"bytes"
+ "compress/zlib"
+ "encoding/binary"
"encoding/hex"
"errors"
"fmt"
@@ -120,6 +122,13 @@ func NewDict(d map[string]Object) Object {
return Object{Kind: Dict, Dict: d}
}
+func NewStream(d map[string]Object, s []byte) Object {
+ if d == nil {
+ d = make(map[string]Object)
+ }
+ return Object{Kind: Stream, Dict: d, Stream: s}
+}
+
func NewIndirect(o Object, n, generation uint) Object {
return Object{Kind: Indirect, N: n, Generation: generation,
Array: []Object{o}}
@@ -477,8 +486,9 @@ func (o *Object) Serialize() string {
// -----------------------------------------------------------------------------
type ref struct {
- offset int64 // file offset or N of the next free entry
+ offset int64 // file offset, or N of the next free entry, or index
generation uint // object generation
+ compressed *uint // PDF 1.5: N of the containing compressed object
nonfree bool // whether this N is taken (for a good zero value)
}
@@ -671,16 +681,159 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
}
}
-func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
+func (u *Updater) loadXrefEntry(
+ n uint, r ref, loadedEntries map[uint]struct{}) {
+ if _, ok := loadedEntries[n]; ok {
+ return
+ }
+ if lenXref := uint(len(u.xref)); n >= lenXref {
+ u.xref = append(u.xref, make([]ref, n-lenXref+1)...)
+ }
+ loadedEntries[n] = struct{}{}
+
+ u.xref[n] = r
+}
+
+func (u *Updater) loadXrefStream(
+ lex *Lexer, stack []Object, loadedEntries map[uint]struct{}) (
+ Object, error) {
+ var object Object
+ for {
+ var err error
+ if object, err = u.parse(lex, &stack); err != nil {
+ return New(End), fmt.Errorf("invalid xref table: %s", err)
+ } else if object.Kind == End {
+ return newError("invalid xref table")
+ }
+
+ // For the sake of simplicity, keep stacking until we find an object.
+ if object.Kind == Indirect {
+ break
+ }
+
+ stack = append(stack, object)
+ }
+
+ // ISO 32000-2:2020 7.5.8.2 Cross-reference stream dictionary
+ stream := object.Array[0]
+ if stream.Kind != Stream {
+ return newError("invalid xref table")
+ }
+ if typ, ok := stream.Dict["Type"]; !ok ||
+ typ.Kind != Name || typ.String != "XRef" {
+ return newError("invalid xref stream")
+ }
+
+ data, err := u.GetStreamData(stream)
+ if err != nil {
+ return New(End), fmt.Errorf("invalid xref stream: %s", err)
+ }
+
+ size, ok := stream.Dict["Size"]
+ if !ok || !size.IsUint() || size.Number <= 0 {
+ return newError("invalid or missing cross-reference stream Size")
+ }
+
+ type pair struct{ start, count uint }
+ pairs := []pair{}
+ if index, ok := stream.Dict["Index"]; !ok {
+ pairs = append(pairs, pair{0, uint(size.Number)})
+ } else {
+ if index.Kind != Array || len(index.Array)%2 != 0 {
+ return newError("invalid cross-reference stream Index")
+ }
+
+ a := index.Array
+ for i := 0; i < len(a); i += 2 {
+ if !a[i].IsUint() || !a[i+1].IsUint() {
+ return newError("invalid cross-reference stream Index")
+ }
+ pairs = append(pairs, pair{uint(a[i].Number), uint(a[i+1].Number)})
+ }
+ }
+
+ w, ok := stream.Dict["W"]
+ if !ok || w.Kind != Array || len(w.Array) != 3 ||
+ !w.Array[0].IsUint() || !w.Array[1].IsUint() || !w.Array[2].IsUint() {
+ return newError("invalid or missing cross-reference stream W")
+ }
+
+ w1 := uint(w.Array[0].Number)
+ w2 := uint(w.Array[1].Number)
+ w3 := uint(w.Array[2].Number)
+ if w2 == 0 {
+ return newError("invalid cross-reference stream W")
+ }
+
+ unit := w1 + w2 + w3
+ if uint(len(data))%unit != 0 {
+ return newError("invalid cross-reference stream length")
+ }
+
+ readField := func(data []byte, width uint) (uint, []byte) {
+ var n uint
+ for ; width != 0; width-- {
+ n = n<<8 | uint(data[0])
+ data = data[1:]
+ }
+ return n, data
+ }
+
+ // ISO 32000-2:2020 7.5.8.3 Cross-reference stream data
+ for _, pair := range pairs {
+ for i := uint(0); i < pair.count; i++ {
+ if uint(len(data)) < unit {
+ return newError("premature cross-reference stream EOF")
+ }
+
+ var f1, f2, f3 uint = 1, 0, 0
+ if w1 > 0 {
+ f1, data = readField(data, w1)
+ }
+ f2, data = readField(data, w2)
+ if w3 > 0 {
+ f3, data = readField(data, w3)
+ }
+
+ var r ref
+ switch f1 {
+ case 0:
+ r.offset = int64(f2)
+ r.generation = f3
+ case 1:
+ r.offset = int64(f2)
+ r.generation = f3
+ r.nonfree = true
+ case 2:
+ r.offset = int64(f3)
+ r.compressed = &f2
+ r.nonfree = true
+ default:
+ // TODO(p): It should be treated as a reference to
+ // the null object. We can't currently represent that.
+ return newError("unsupported cross-reference stream contents")
+ }
+
+ u.loadXrefEntry(pair.start+i, r, loadedEntries)
+ }
+ }
+
+ stream.Kind = Dict
+ stream.Stream = nil
+ return stream, nil
+}
+
+func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) (
+ Object, error) {
var throwawayStack []Object
- if keyword, _ := u.parse(lex,
- &throwawayStack); keyword.Kind != Keyword || keyword.String != "xref" {
- return errors.New("invalid xref table")
+ if object, _ := u.parse(lex,
+ &throwawayStack); object.Kind != Keyword || object.String != "xref" {
+ return u.loadXrefStream(lex, []Object{object}, loadedEntries)
}
for {
object, _ := u.parse(lex, &throwawayStack)
if object.Kind == End {
- return errors.New("unexpected EOF while looking for the trailer")
+ return newError("unexpected EOF while looking for the trailer")
}
if object.Kind == Keyword && object.String == "trailer" {
break
@@ -688,7 +841,7 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
second, _ := u.parse(lex, &throwawayStack)
if !object.IsUint() || !second.IsUint() {
- return errors.New("invalid xref section header")
+ return newError("invalid xref section header")
}
start, count := uint(object.Number), uint(second.Number)
@@ -700,33 +853,29 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
off.Number > float64(len(u.Document)) ||
!gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 ||
key.Kind != Keyword {
- return errors.New("invalid xref entry")
+ return newError("invalid xref entry")
}
free := true
if key.String == "n" {
free = false
} else if key.String != "f" {
- return errors.New("invalid xref entry")
- }
-
- n := start + i
- if _, ok := loadedEntries[n]; ok {
- continue
- }
- if lenXref := uint(len(u.xref)); n >= lenXref {
- u.xref = append(u.xref, make([]ref, n-lenXref+1)...)
+ return newError("invalid xref entry")
}
- loadedEntries[n] = struct{}{}
- u.xref[n] = ref{
+ u.loadXrefEntry(start+i, ref{
offset: int64(off.Number),
generation: uint(gen.Number),
nonfree: !free,
- }
+ }, loadedEntries)
}
}
- return nil
+
+ trailer, _ := u.parse(lex, &throwawayStack)
+ if trailer.Kind != Dict {
+ return newError("invalid trailer dictionary")
+ }
+ return trailer, nil
}
// -----------------------------------------------------------------------------
@@ -756,7 +905,6 @@ func NewUpdater(document []byte) (*Updater, error) {
loadedXrefs := make(map[int64]struct{})
loadedEntries := make(map[uint]struct{})
- var throwawayStack []Object
for {
if _, ok := loadedXrefs[xrefOffset]; ok {
return nil, errors.New("circular xref offsets")
@@ -766,19 +914,21 @@ func NewUpdater(document []byte) (*Updater, error) {
}
lex := Lexer{u.Document[xrefOffset:]}
- if err := u.loadXref(&lex, loadedEntries); err != nil {
+ trailer, err := u.loadXref(&lex, loadedEntries)
+ if err != nil {
return nil, err
}
- trailer, _ := u.parse(&lex, &throwawayStack)
- if trailer.Kind != Dict {
- return nil, errors.New("invalid trailer dictionary")
- }
if len(loadedXrefs) == 0 {
u.Trailer = trailer.Dict
}
loadedXrefs[xrefOffset] = struct{}{}
+ // TODO(p): Descend into XRefStm here first, if present,
+ // which is also a linked list.
+
+ // We allow for mixed cross-reference tables and streams
+ // within a single Prev list, although this should never occur.
prevOffset, ok := trailer.Dict["Prev"]
if !ok {
break
@@ -825,6 +975,100 @@ func (u *Updater) Version(root *Object) int {
return 0
}
+func (u *Updater) getFromObjStm(nObjStm, n uint) (Object, error) {
+ if nObjStm == n {
+ return newError("ObjStm recursion")
+ }
+
+ stream, err := u.Get(nObjStm, 0)
+ if err != nil {
+ return stream, err
+ }
+ if stream.Kind != Stream {
+ return newError("invalid ObjStm")
+ }
+ if typ, ok := stream.Dict["Type"]; !ok ||
+ typ.Kind != Name || typ.String != "ObjStm" {
+ return newError("invalid ObjStm")
+ }
+
+ data, err := u.GetStreamData(stream)
+ if err != nil {
+ return New(End), fmt.Errorf("invalid ObjStm: %s", err)
+ }
+ entryN, ok := stream.Dict["N"]
+ if !ok || !entryN.IsUint() || entryN.Number <= 0 {
+ return newError("invalid ObjStm N")
+ }
+ entryFirst, ok := stream.Dict["First"]
+ if !ok || !entryFirst.IsUint() || entryFirst.Number <= 0 {
+ return newError("invalid ObjStm First")
+ }
+
+ // NOTE: This means descending into that stream if n is not found here.
+ // It is meant to be an object reference.
+ if extends, ok := stream.Dict["Extends"]; ok && extends.Kind != Nil {
+ return newError("ObjStm extensions are unsupported")
+ }
+
+ count := uint(entryN.Number)
+ first := uint(entryFirst.Number)
+ if first > uint(len(data)) {
+ return newError("invalid ObjStm First")
+ }
+
+ lex1 := Lexer{data[:first]}
+ data = data[first:]
+
+ type pair struct{ n, offset uint }
+ pairs := []pair{}
+ for i := uint(0); i < count; i++ {
+ var throwawayStack []Object
+ objN, _ := u.parse(&lex1, &throwawayStack)
+ objOffset, _ := u.parse(&lex1, &throwawayStack)
+ if !objN.IsUint() || !objOffset.IsUint() {
+ return newError("invalid ObjStm pairs")
+ }
+ pairs = append(pairs, pair{uint(objN.Number), uint(objOffset.Number)})
+ }
+ for i, pair := range pairs {
+ if pair.offset > uint(len(data)) ||
+ i > 0 && pairs[i-1].offset >= pair.offset {
+ return newError("invalid ObjStm pairs")
+ }
+ }
+
+ for i, pair := range pairs {
+ if pair.n != n {
+ continue
+ }
+
+ if i+1 < len(pairs) {
+ data = data[pair.offset:pairs[i+1].offset]
+ } else {
+ data = data[pair.offset:]
+ }
+
+ lex2 := Lexer{data}
+ var stack []Object
+ for {
+ object, err := u.parse(&lex2, &stack)
+ if err != nil {
+ return object, err
+ } else if object.Kind == End {
+ break
+ } else {
+ stack = append(stack, object)
+ }
+ }
+ if len(stack) == 0 {
+ return newError("empty ObjStm object")
+ }
+ return stack[0], nil
+ }
+ return newError("object not found in ObjStm")
+}
+
// Get retrieves an object by its number and generation--may return
// Nil or End with an error.
func (u *Updater) Get(n, generation uint) (Object, error) {
@@ -833,8 +1077,13 @@ func (u *Updater) Get(n, generation uint) (Object, error) {
}
ref := u.xref[n]
- if !ref.nonfree || ref.generation != generation ||
- ref.offset >= int64(len(u.Document)) {
+ if !ref.nonfree || ref.generation != generation {
+ return New(Nil), nil
+ }
+
+ if ref.compressed != nil {
+ return u.getFromObjStm(*ref.compressed, n)
+ } else if ref.offset >= int64(len(u.Document)) {
return New(Nil), nil
}
@@ -889,8 +1138,8 @@ type BytesWriter interface {
WriteString(s string) (n int, err error)
}
-// Update appends an updated object to the end of the document. The fill
-// callback must write exactly one PDF object.
+// Update appends an updated object to the end of the document.
+// The fill callback must write exactly one PDF object.
func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
oldRef := u.xref[n]
u.updated[n] = struct{}{}
@@ -910,20 +1159,62 @@ func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
u.Document = buf.Bytes()
}
-// FlushUpdates writes an updated cross-reference table and trailer.
-func (u *Updater) FlushUpdates() {
- updated := make([]uint, 0, len(u.updated))
- for n := range u.updated {
- updated = append(updated, n)
+func (u *Updater) flushXRefStm(updated []uint, buf *bytes.Buffer) {
+ // The cross-reference stream has to point to itself.
+ // XXX: We only duplicate Update code here due to how we currently buffer.
+ n := u.Allocate()
+ updated = append(updated, n)
+
+ u.updated[n] = struct{}{}
+ u.xref[n] = ref{
+ offset: int64(buf.Len() + 1),
+ generation: 0,
+ nonfree: true,
}
- sort.Slice(updated, func(i, j int) bool {
- return updated[i] < updated[j]
+
+ index, b := []Object{}, []byte{}
+ write := func(f1 byte, f2, f3 uint64) {
+ b = append(b, f1)
+ b = binary.BigEndian.AppendUint64(b, f2)
+ b = binary.BigEndian.AppendUint64(b, f3)
+ }
+ for i := 0; i < len(updated); {
+ start, stop := updated[i], updated[i]+1
+ for i++; i < len(updated) && updated[i] == stop; i++ {
+ stop++
+ }
+
+ index = append(index,
+ NewNumeric(float64(start)), NewNumeric(float64(stop-start)))
+ for ; start < stop; start++ {
+ ref := u.xref[start]
+ if ref.compressed != nil {
+ write(2, uint64(*ref.compressed), uint64(ref.offset))
+ } else if ref.nonfree {
+ write(1, uint64(ref.offset), uint64(ref.generation))
+ } else {
+ write(0, uint64(ref.offset), uint64(ref.generation))
+ }
+ }
+ }
+
+ u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
+ u.Trailer["Index"] = NewArray(index)
+ u.Trailer["W"] = NewArray([]Object{
+ NewNumeric(1), NewNumeric(8), NewNumeric(8),
})
- buf := bytes.NewBuffer(u.Document)
- startXref := buf.Len() + 1
- buf.WriteString("\nxref\n")
+ for _, key := range []string{
+ "Filter", "DecodeParms", "F", "FFilter", "FDecodeParms", "DL"} {
+ delete(u.Trailer, key)
+ }
+ stream := NewStream(u.Trailer, b)
+ fmt.Fprintf(buf, "\n%d 0 obj\n%s\nendobj", n, stream.Serialize())
+}
+
+func (u *Updater) flushXRefTable(updated []uint, buf *bytes.Buffer) {
+ buf.WriteString("\nxref\n")
for i := 0; i < len(updated); {
start, stop := updated[i], updated[i]+1
for i++; i < len(updated) && updated[i] == stop; i++ {
@@ -932,8 +1223,9 @@ func (u *Updater) FlushUpdates() {
fmt.Fprintf(buf, "%d %d\n", start, stop-start)
for ; start < stop; start++ {
+ // XXX: We should warn about any object streams here.
ref := u.xref[start]
- if ref.nonfree {
+ if ref.nonfree && ref.compressed == nil {
fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
} else {
fmt.Fprintf(buf, "%010d %05d f \n", ref.offset, ref.generation)
@@ -950,10 +1242,38 @@ func (u *Updater) FlushUpdates() {
u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
trailer := NewDict(u.Trailer)
+ fmt.Fprintf(buf, "trailer\n%s", trailer.Serialize())
+}
- fmt.Fprintf(buf, "trailer\n%s\nstartxref\n%d\n%%%%EOF\n",
- trailer.Serialize(), startXref)
+// FlushUpdates writes an updated cross-reference table and trailer, or stream.
+func (u *Updater) FlushUpdates() {
+ updated := make([]uint, 0, len(u.updated))
+ for n := range u.updated {
+ updated = append(updated, n)
+ }
+ sort.Slice(updated, func(i, j int) bool {
+ return updated[i] < updated[j]
+ })
+
+ // It does not seem to be possible to upgrade a PDF file
+ // from trailer dictionaries to cross-reference streams,
+ // so keep continuity either way.
+ //
+ // (Downgrading from cross-reference streams using XRefStm would not
+ // create a true hybrid-reference file, although it should work.)
+ buf := bytes.NewBuffer(u.Document)
+ startXref := buf.Len() + 1 /* '\n' */
+ if typ, _ := u.Trailer["Type"]; typ.Kind == Name && typ.String == "XRef" {
+ u.flushXRefStm(updated, buf)
+ } else {
+ u.flushXRefTable(updated, buf)
+ }
+
+ fmt.Fprintf(buf, "\nstartxref\n%d\n%%%%EOF\n", startXref)
u.Document = buf.Bytes()
+ u.updated = make(map[uint]struct{})
+
+ u.Trailer["Prev"] = NewNumeric(float64(startXref))
}
// -----------------------------------------------------------------------------
@@ -971,6 +1291,36 @@ func NewDate(ts time.Time) Object {
return NewString(string(buf))
}
+// GetStreamData returns the actual data stored in a stream object,
+// applying any filters.
+func (u *Updater) GetStreamData(stream Object) ([]byte, error) {
+ if f, ok := stream.Dict["F"]; ok && f.Kind != Nil {
+ return nil, errors.New("stream data in other files are unsupported")
+ }
+
+ // Support just enough to decode a common cross-reference stream.
+ if filter, ok := stream.Dict["Filter"]; !ok {
+ return stream.Stream, nil
+ } else if filter.Kind != Name || filter.String != "FlateDecode" {
+ return nil, errors.New("unsupported stream Filter")
+ }
+
+ // TODO(p): Support << /Columns N /Predictor 12 >>
+ // which usually appears in files with cross-reference streams.
+ if parms, ok := stream.Dict["DecodeParms"]; ok && parms.Kind != Nil {
+ return nil, errors.New("DecodeParms are not supported")
+ }
+
+ r, err := zlib.NewReader(bytes.NewReader(stream.Stream))
+ if err != nil {
+ return nil, err
+ }
+
+ var b bytes.Buffer
+ _, err = b.ReadFrom(r)
+ return b.Bytes(), err
+}
+
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
// or returns a Nil object if unsuccessful.
func (u *Updater) GetFirstPage(node Object) Object {
@@ -1274,7 +1624,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
})
// 8.6.1 Interactive Form Dictionary
- if _, ok := root.Dict["AcroForm"]; ok {
+ if acroform, ok := root.Dict["AcroForm"]; ok && acroform.Kind != Nil {
return nil, errors.New("the document already contains forms, " +
"they would be overwritten")
}
diff --git a/test.sh b/test.sh
index f87d525..40bd165 100755
--- a/test.sh
+++ b/test.sh
@@ -11,11 +11,15 @@ mkdir tmp
# Create documents in various tools
log "Creating source documents"
-inkscape --pipe --export-filename=tmp/cairo.pdf <<'EOF' 2>/dev/null || :
+inkscape --pipe --export-filename=tmp/cairo.pdf --export-pdf-version=1.4 \
+<<'EOF' 2>/dev/null || :
<svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg>
EOF
-date | tee tmp/lowriter.txt | groff -T pdf > tmp/groff.pdf || :
+date > tmp/lowriter.txt
+if command -v gropdf >/dev/null
+then groff -T pdf < tmp/lowriter.txt > tmp/groff.pdf
+fi
lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || :
convert rose: tmp/imagemagick.pdf || :
@@ -45,7 +49,11 @@ openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \
-CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \
-extensions smime -extfile tmp/cert.cfg 2>/dev/null
openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null
+
+# The second line accomodates the Go signer,
+# which doesn't support SHA-256 within pkcs12 handling
openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \
+ -certpbe PBE-SHA1-3DES -keypbe PBE-SHA1-3DES -macalg sha1 \
-export -passout pass: -out tmp/key-pair.p12
for tool in "$@"; do
@@ -55,6 +63,12 @@ for tool in "$@"; do
result=${source%.pdf}.signed.pdf
$tool "$source" "$result" tmp/key-pair.p12 ""
pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation
+
+ # Only some of our generators use PDF versions higher than 1.5
+ log "Testing $tool for version detection"
+ grep -q "/Version /1[.]6" "$result" \
+ || grep -q "^%PDF-1[.][67]" "$result" \
+ || die "Version detection seems to misbehave (no upgrade)"
done
log "Testing $tool for expected failures"
@@ -63,11 +77,6 @@ for tool in "$@"; do
$tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \
&& die "Too low reservations shouldn't succeed"
- # Our generators do not use PDF versions higher than 1.5
- log "Testing $tool for version detection"
- grep -q "/Version /1.6" "$result" \
- || die "Version detection seems to misbehave (no upgrade)"
-
sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt"
$tool "$source.alt" "$result.alt" tmp/key-pair.p12 ""
grep -q "/Version /1.6" "$result.alt" \