diff --git a/pdfbox/pom.xml b/pdfbox/pom.xml index 112d70221b8..81a978dda02 100644 --- a/pdfbox/pom.xml +++ b/pdfbox/pom.xml @@ -697,6 +697,19 @@ 5ae7f232c47c13ed31997eb2c368e7deb1013c1321d70bf79369f8d709b33406191d94c21a5d27b4c4bb48241bafd9328a0a6d2d093d4e540d5044e9503bd099 + + PDFBOX-5026 + generate-test-resources + + wget + + + https://issues.apache.org/jira/secure/attachment/13015945/issue9418.pdf + ${project.build.directory}/pdfs + PDFBOX-5026.pdf + 1e47caa4246752bc392e596803cb95556bde578b687352a7434d9b77f92892539f810f046e76e4b2300d859ab9a8755a4212378ae099a825367f154919524d7c + + diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index 26f7d8af53d..2e3732cb699 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -272,7 +272,7 @@ protected COSDictionary retrieveTrailer() throws IOException } } // check if the trailer contains a Root object - if (trailer != null && trailer.getItem(COSName.ROOT) == null) + if (trailer != null && !isValidTrailer(trailer)) { rebuildTrailer = isLenient(); } @@ -292,6 +292,30 @@ protected COSDictionary retrieveTrailer() throws IOException return trailer; } + /** + * Check that the trailer contains a Root object and that the Root + * contains a Pages object. + * + * @param trailer the trailer to validate + * @return whether or not the trailer is valid. + * @throws IOException if an error occurs + */ + private boolean isValidTrailer(COSDictionary trailer) throws IOException + { + COSObject root = trailer.getCOSObject(COSName.ROOT); + if (root == null) + { + return false; + } + COSBase base = parseObjectDynamically(root, false); + if (!(base instanceof COSDictionary)) + { + return false; + } + COSDictionary rootDict = (COSDictionary) base; + return rootDict.getCOSObject(COSName.PAGES) != null; + } + /** * Parses cross reference tables. * diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java index 836963f016f..20286bee73d 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java @@ -25,7 +25,6 @@ import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; @@ -164,14 +163,14 @@ public PDDocument getPDDocument() throws IOException * The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset) * to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref * at the beginning of the file. Last the root object is parsed. - * + * * @throws InvalidPasswordException If the password is incorrect. * @throws IOException If something went wrong. */ protected void initialParse() throws IOException { COSDictionary trailer = retrieveTrailer(); - + COSBase base = parseTrailerValuesDynamically(trailer); if (!(base instanceof COSDictionary)) { diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java index 9617a912ec6..0f13c5a078c 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java @@ -363,6 +363,19 @@ public void testPDFBox4490() throws IOException doc.close(); } + /** + * Test that PDFBOX-5026 has pages tree. + * + * @throws IOException + */ + @Test + public void testPDFBox5026() throws IOException + { + PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-5026.pdf")); + assertEquals(1, doc.getNumberOfPages()); + doc.close(); + } + private void executeParserTest(RandomAccessRead source, MemoryUsageSetting memUsageSetting) throws IOException { ScratchFile scratchFile = new ScratchFile(memUsageSetting);