Skip to content

Rebuild the trailer when missing pages item #92

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: 2.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pdfbox/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,19 @@
<sha512>5ae7f232c47c13ed31997eb2c368e7deb1013c1321d70bf79369f8d709b33406191d94c21a5d27b4c4bb48241bafd9328a0a6d2d093d4e540d5044e9503bd099</sha512>
</configuration>
</execution>
<execution>
<id>PDFBOX-5026</id>
<phase>generate-test-resources</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<url>https://issues.apache.org/jira/secure/attachment/13015945/issue9418.pdf</url>
<outputDirectory>${project.build.directory}/pdfs</outputDirectory>
<outputFileName>PDFBOX-5026.pdf</outputFileName>
<sha512>1e47caa4246752bc392e596803cb95556bde578b687352a7434d9b77f92892539f810f046e76e4b2300d859ab9a8755a4212378ae099a825367f154919524d7c</sha512>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
Expand Down
26 changes: 25 additions & 1 deletion pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ protected COSDictionary retrieveTrailer() throws IOException
}
}
// check if the trailer contains a Root object
if (trailer != null && trailer.getItem(COSName.ROOT) == null)
if (trailer != null && !isValidTrailer(trailer))
{
rebuildTrailer = isLenient();
}
Expand All @@ -292,6 +292,30 @@ protected COSDictionary retrieveTrailer() throws IOException
return trailer;
}

/**
* Check that the trailer contains a Root object and that the Root
* contains a Pages object.
*
* @param trailer the trailer to validate
* @return whether or not the trailer is valid.
* @throws IOException if an error occurs
*/
private boolean isValidTrailer(COSDictionary trailer) throws IOException
{
COSObject root = trailer.getCOSObject(COSName.ROOT);
if (root == null)
{
return false;
}
COSBase base = parseObjectDynamically(root, false);
if (!(base instanceof COSDictionary))
{
return false;
}
COSDictionary rootDict = (COSDictionary) base;
return rootDict.getCOSObject(COSName.PAGES) != null;
}

/**
* Parses cross reference tables.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
Expand Down Expand Up @@ -164,14 +163,14 @@ public PDDocument getPDDocument() throws IOException
* The initial parse will first parse only the trailer, the xrefstart and all xref tables to have a pointer (offset)
* to all the pdf's objects. It can handle linearized pdfs, which will have an xref at the end pointing to an xref
* at the beginning of the file. Last the root object is parsed.
*
*
* @throws InvalidPasswordException If the password is incorrect.
* @throws IOException If something went wrong.
*/
protected void initialParse() throws IOException
{
COSDictionary trailer = retrieveTrailer();

COSBase base = parseTrailerValuesDynamically(trailer);
if (!(base instanceof COSDictionary))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,19 @@ public void testPDFBox4490() throws IOException
doc.close();
}

/**
* Test that PDFBOX-5026 has pages tree.
*
* @throws IOException
*/
@Test
public void testPDFBox5026() throws IOException
{
PDDocument doc = PDDocument.load(new File(TARGETPDFDIR, "PDFBOX-5026.pdf"));
assertEquals(1, doc.getNumberOfPages());
doc.close();
}

private void executeParserTest(RandomAccessRead source, MemoryUsageSetting memUsageSetting) throws IOException
{
ScratchFile scratchFile = new ScratchFile(memUsageSetting);
Expand Down