From 3f3bbea7fa58cce8f96e26523e3b8172c1b5b109 Mon Sep 17 00:00:00 2001 From: Robert Schroll Date: Thu, 9 Jan 2014 15:10:19 -0500 Subject: [PATCH 1/2] Use new pdfminer API if old one isn't available --- prsannots/pagetext.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/prsannots/pagetext.py b/prsannots/pagetext.py index 130145c..66c9fc4 100644 --- a/prsannots/pagetext.py +++ b/prsannots/pagetext.py @@ -3,11 +3,39 @@ # This file is part of prsannots and is distributed under the terms of # the LGPL license. See the file COPYING for full details. -from pdfminer.pdfparser import PDFParser, PDFDocument +from pdfminer.pdfparser import PDFParser from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.layout import LAParams, LTAnon, LTTextBox from pdfminer.converter import PDFPageAggregator +# pdfminer suddenly decided to change its API... +try: + from pdfminer.pdfparser import PDFDocument + + def new_doc(parser): + doc = PDFDocument() + parser.set_document(doc) + doc.set_parser(parser) + return doc + + def get_pages(doc): + return doc.get_pages() + +except ImportError: + from pdfminer.pdfdocument import PDFDocument + from pdfminer.pdfpage import PDFPage + + def new_doc(parser): + return PDFDocument(parser) + + def get_pages(doc): + return PDFPage.create_pages(doc) + +try: + from pdfminer.layout import LTAnon +except ImportError: + from pdfminer.layout import LTAnno as LTAnon + LIGATURES = {u"\ufb00": "ff", u"\ufb01": "fi", @@ -27,9 +55,7 @@ def get_layouts(fd): """From an open PDF file, get the page layouts (of type pdfminer.layout.LTPage).""" parser = PDFParser(fd) - doc = PDFDocument() - parser.set_document(doc) - doc.set_parser(parser) + doc = new_doc(parser) doc.initialize() laparams = LAParams() @@ -38,7 +64,7 @@ def get_layouts(fd): interpreter = PDFPageInterpreter(rsrcmgr, device) layouts = [] - for page in doc.get_pages(): + for page in get_pages(doc): interpreter.process_page(page) layouts.append(device.get_result()) return layouts From 070f404a15eca87e7b8f497dd81a8778aef4e364 Mon Sep 17 00:00:00 2001 From: Robert Schroll Date: Thu, 9 Jan 2014 16:48:05 -0500 Subject: [PATCH 2/2] Remove all bits of old API from outside of try blocks Thanks to Kris Thielemans for troubleshooting. Closes #13. --- prsannots/pagetext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prsannots/pagetext.py b/prsannots/pagetext.py index 66c9fc4..0f02753 100644 --- a/prsannots/pagetext.py +++ b/prsannots/pagetext.py @@ -5,7 +5,7 @@ from pdfminer.pdfparser import PDFParser from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter -from pdfminer.layout import LAParams, LTAnon, LTTextBox +from pdfminer.layout import LAParams, LTTextBox from pdfminer.converter import PDFPageAggregator # pdfminer suddenly decided to change its API...