From 2d08100b58b6c7e06f124aef3e2761bcdaeac85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Fri, 4 Sep 2020 17:16:42 +0200 Subject: Avoid downgrading the document's PDF version --- README.adoc | 2 -- pdf-simple-sign.cpp | 32 ++++++++++++++++++++++++++------ pdf/pdf.go | 40 +++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/README.adoc b/README.adoc index 46bc9ed..6e7acd3 100644 --- a/README.adoc +++ b/README.adoc @@ -9,8 +9,6 @@ the Cairo library. As such, it currently comes with some restrictions: overwritten * the document may not employ cross-reference streams, or must constitute a hybrid-reference file at least - * the document may not be newer than PDF 1.6 already, or it will get downgraded - to that version * the signature may take at most 4 kilobytes as a compile-time limit, which should be enough space even for one intermediate certificate diff --git a/pdf-simple-sign.cpp b/pdf-simple-sign.cpp index b724f69..6d01211 100644 --- a/pdf-simple-sign.cpp +++ b/pdf-simple-sign.cpp @@ -342,6 +342,9 @@ public: /// Build the cross-reference table and prepare a new trailer dictionary std::string initialize(); + /// Try to extract the claimed PDF version as a positive decimal number, e.g. 17 for PDF 1.7. + /// Returns zero on failure. + int version(const pdf_object& root) const; /// Retrieve an object by its number and generation -- may return NIL or END with an error pdf_object get(uint n, uint generation) const; /// Allocate a new object number @@ -512,7 +515,7 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set& loaded_entrie std::string pdf_updater::initialize() { // We only need to look for startxref roughly within the last kibibyte of the document - static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF"); + static std::regex haystack_re(R"([\s\S]*\sstartxref\s+(\d+)\s+%%EOF)"); std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024); std::smatch m; @@ -560,6 +563,25 @@ std::string pdf_updater::initialize() { return ""; } +int pdf_updater::version(const pdf_object& root) const { + auto version = root.dict.find("Version"); + if (version != root.dict.end() && version->second.type == pdf_object::NAME) { + const auto& v = version->second.string; + if (isdigit(v[0]) && v[1] == '.' && isdigit(v[2]) && !v[3]) + return (v[0] - '0') * 10 + (v[2] - '0'); + } + + // We only need to look for the comment roughly within the first kibibyte of the document + static std::regex version_re(R"((?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n])"); + std::string haystack = document.substr(0, 1024); + + std::smatch m; + if (std::regex_search(haystack, m, version_re, std::regex_constants::match_default)) + return std::stoul(m.str(1)) * 10 + std::stoul(m.str(2)); + + return 0; +} + pdf_object pdf_updater::get(uint n, uint generation) const { if (n >= xref_size) return {pdf_object::NIL}; @@ -806,8 +828,6 @@ error: /// streams from PDF 1.5, or at least constitutes a hybrid-reference file. The results with /// PDF 2.0 (2017) are currently unknown as the standard costs money. /// -/// Carelessly assumes that the version of the original document is at most PDF 1.6. -/// /// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf /// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf /// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf @@ -892,9 +912,9 @@ static std::string pdf_sign(std::string& document) { }}; // Upgrade the document version for SHA-256 etc. - // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer - // version than 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version() - root.dict["Version"] = {pdf_object::NAME, "1.6"}; + if (pdf.version(root) < 16) + root.dict["Version"] = {pdf_object::NAME, "1.6"}; + pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); }); pdf.flush_updates(); diff --git a/pdf/pdf.go b/pdf/pdf.go index d9e4098..a6f1ae8 100644 --- a/pdf/pdf.go +++ b/pdf/pdf.go @@ -670,7 +670,7 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error { // ----------------------------------------------------------------------------- -var haystackRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`) +var trailerRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`) // NewUpdater initializes an Updater, building the cross-reference table and // preparing a new trailer dictionary. @@ -685,7 +685,7 @@ func NewUpdater(document []byte) (*Updater, error) { haystack = haystack[len(haystack)-1024:] } - m := haystackRE.FindSubmatch(haystack) + m := trailerRE.FindSubmatch(haystack) if m == nil { return nil, errors.New("cannot find startxref") } @@ -739,6 +739,31 @@ func NewUpdater(document []byte) (*Updater, error) { return u, nil } +var versionRE = regexp.MustCompile( + `(?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n]`) + +// Version extracts the claimed PDF version as a positive decimal number, +// e.g. 17 for PDF 1.7. Returns zero on failure. +func (u *Updater) Version(root *Object) int { + if version, ok := root.Dict["Version"]; ok && version.Kind == Name { + if v := version.String; len(v) == 3 && v[1] == '.' && + v[0] >= '0' && v[0] <= '9' && v[2] >= '0' && v[2] <= '9' { + return int(v[0]-'0')*10 + int(v[2]-'0') + } + } + + // We only need to look for the comment roughly within + // the first kibibyte of the document. + haystack := u.Document + if len(haystack) > 1024 { + haystack = haystack[:1024] + } + if m := versionRE.FindSubmatch(haystack); m != nil { + return int(m[1][0]-'0')*10 + int(m[2][0]-'0') + } + return 0 +} + // Get retrieves an object by its number and generation--may return // Nil or End with an error. // @@ -1094,9 +1119,6 @@ func FillInSignature(document []byte, signOff, signLen int, // employ cross-reference streams from PDF 1.5, or at least constitutes // a hybrid-reference file. The results with PDF 2.0 (2017) are currently // unknown as the standard costs money. -// -// Carelessly assumes that the version of the original document is at most -// PDF 1.6. func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate) ([]byte, error) { pdf, err := NewUpdater(document) @@ -1195,10 +1217,10 @@ func Sign(document []byte, }) // Upgrade the document version for SHA-256 etc. - // XXX: Assuming that it's not newer than 1.6 already--while Cairo can't - // currently use a newer version that 1.5, it's not a bad idea to use - // cairo_pdf_surface_restrict_to_version(). - root.Dict["Version"] = NewName("1.6") + if pdf.Version(&root) < 16 { + root.Dict["Version"] = NewName("1.6") + } + pdf.Update(rootRef.N, func(buf BytesWriter) { buf.WriteString(root.Serialize()) }) -- cgit v1.2.3