Skip to content

Commit

Permalink
Merge branch 'pdfminer'
Browse files Browse the repository at this point in the history
  • Loading branch information
rschroll committed Jan 9, 2014
2 parents e46b06f + 070f404 commit 2fde569
Showing 1 changed file with 32 additions and 6 deletions.
38 changes: 32 additions & 6 deletions prsannots/pagetext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,39 @@
# This file is part of prsannots and is distributed under the terms of
# the LGPL license. See the file COPYING for full details.

from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.layout import LAParams, LTAnon, LTTextBox
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.converter import PDFPageAggregator

# pdfminer suddenly decided to change its API...
try:
from pdfminer.pdfparser import PDFDocument

def new_doc(parser):
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
return doc

def get_pages(doc):
return doc.get_pages()

except ImportError:
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage

def new_doc(parser):
return PDFDocument(parser)

def get_pages(doc):
return PDFPage.create_pages(doc)

try:
from pdfminer.layout import LTAnon
except ImportError:
from pdfminer.layout import LTAnno as LTAnon


LIGATURES = {u"\ufb00": "ff",
u"\ufb01": "fi",
Expand All @@ -27,9 +55,7 @@ def get_layouts(fd):
"""From an open PDF file, get the page layouts (of type pdfminer.layout.LTPage)."""

parser = PDFParser(fd)
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
doc = new_doc(parser)
doc.initialize()

laparams = LAParams()
Expand All @@ -38,7 +64,7 @@ def get_layouts(fd):
interpreter = PDFPageInterpreter(rsrcmgr, device)

layouts = []
for page in doc.get_pages():
for page in get_pages(doc):
interpreter.process_page(page)
layouts.append(device.get_result())
return layouts
Expand Down

0 comments on commit 2fde569

Please sign in to comment.