aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2021-12-09 13:07:02 +0100
committerPřemysl Eric Janouch <p@janouch.name>2021-12-09 14:07:14 +0100
commit97ffe3d46ebc71097bbb83c738e39cf8fd64e9c8 (patch)
tree718ee0295aee2eeacbd5b44da7d436f3db026d74
parent1a3c7a8282049630a180d91b2e0b2ac82d8b876f (diff)
downloadpdf-simple-sign-97ffe3d46ebc71097bbb83c738e39cf8fd64e9c8.tar.gz
pdf-simple-sign-97ffe3d46ebc71097bbb83c738e39cf8fd64e9c8.tar.xz
pdf-simple-sign-97ffe3d46ebc71097bbb83c738e39cf8fd64e9c8.zip
Go: implement stream parsing/serialization
-rw-r--r--pdf/pdf.go66
1 files changed, 58 insertions, 8 deletions
diff --git a/pdf/pdf.go b/pdf/pdf.go
index 05905c0..120c238 100644
--- a/pdf/pdf.go
+++ b/pdf/pdf.go
@@ -59,6 +59,7 @@ const (
// higher-level objects
Array
Dict
+ Stream
Indirect
Reference
)
@@ -72,7 +73,8 @@ type Object struct {
String string // Comment/Keyword/Name/String
Number float64 // Bool, Numeric
Array []Object // Array, Indirect
- Dict map[string]Object // Dict, in the future also Stream
+ Dict map[string]Object // Dict, Stream
+ Stream []byte // Stream
N, Generation uint // Indirect, Reference
}
@@ -458,6 +460,10 @@ func (o *Object) Serialize() string {
fmt.Fprint(b, " /", k, " ", v.Serialize())
}
return "<<" + b.String() + " >>"
+ case Stream:
+ d := NewDict(o.Dict)
+ d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
+ return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
case Indirect:
return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
o.Array[0].Serialize())
@@ -497,6 +503,54 @@ type Updater struct {
Trailer map[string]Object
}
+func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
+ lenStack := len(*stack)
+ if lenStack < 1 {
+ return newError("missing stream dictionary")
+ }
+ dict := (*stack)[lenStack-1]
+ if dict.Kind != Dict {
+ return newError("stream not preceded by a dictionary")
+ }
+
+ *stack = (*stack)[:lenStack-1]
+ length, ok := dict.Dict["Length"]
+ if !ok {
+ return newError("missing stream Length")
+ }
+ length, err := u.Dereference(length)
+ if err != nil {
+ return length, err
+ }
+ if !length.IsUint() || length.Number > math.MaxInt {
+ return newError("stream Length not an unsigned integer")
+ }
+
+ // Expect exactly one newline.
+ if nl, err := lex.Next(); err != nil {
+ return nl, err
+ } else if nl.Kind != NL {
+ return newError("stream does not start with a newline")
+ }
+
+ size := int(length.Number)
+ if len(lex.P) < size {
+ return newError("stream is longer than the document")
+ }
+
+ dict.Kind = Stream
+ dict.Stream = lex.P[:size]
+ lex.P = lex.P[size:]
+
+ // Skip any number of trailing newlines or comments.
+ if end, err := u.parse(lex, stack); err != nil {
+ return end, err
+ } else if end.Kind != Keyword || end.String != "endstream" {
+ return newError("improperly terminated stream")
+ }
+ return dict, nil
+}
+
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
lenStack := len(*stack)
if lenStack < 2 {
@@ -590,15 +644,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
}
return NewDict(dict), nil
case Keyword:
- // Appears in the document body, typically needs
- // to access the cross-reference table.
- //
- // TODO(p): Use the xref to read /Length etc. once we
- // actually need to read such objects; presumably
- // streams can use the Object.String member.
switch token.String {
case "stream":
- return newError("streams are not supported yet")
+ // Appears in the document body,
+ // typically needs to access the cross-reference table.
+ return u.parseStream(lex, stack)
case "obj":
return u.parseIndirect(lex, stack)
case "R":