Skip to content

Commit

Permalink
Page.java: TextExtractor.java: Add page number to XML output.
Browse files Browse the repository at this point in the history
  • Loading branch information
kjw committed Jun 10, 2010
1 parent 53ee8ee commit bc595f6
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
10 changes: 6 additions & 4 deletions src/org/crossref/pdf2xml/TextExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,20 @@
*/
public class TextExtractor extends PDFTextStripper {

private ArrayList<Page> previousPages;
private ArrayList<Page> previousPages = new ArrayList<Page>();

private Page currentPage;
private Page currentPage = null;

private int pageCount = 0;

public TextExtractor() throws IOException {
super();
previousPages = new ArrayList<Page>();
}

@Override
public void processStream(PDPage aPage, PDResources resources,
COSStream cosStream) throws IOException {
currentPage = new Page(aPage.findCropBox());
currentPage = new Page(aPage.findCropBox(), ++pageCount);

super.processStream(aPage, resources, cosStream);
coalesceRows(currentPage);
Expand Down Expand Up @@ -172,6 +173,7 @@ public String toXml() {
pageEle.setAttribute("left", String.valueOf(cb.getLowerLeftX()));
pageEle.setAttribute("width", String.valueOf(cb.getWidth()));
pageEle.setAttribute("height", String.valueOf(cb.getHeight()));
pageEle.setAttribute("number", String.valueOf(page.getNumber()));
pdf2xml.appendChild(pageEle);

for (Text t : page.getText()) {
Expand Down
8 changes: 7 additions & 1 deletion src/org/crossref/pdf2xml/data/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ public class Page {
private ArrayList<Text> texts;
private HashMap<Float, ArrayList<Text>> yPosMap;
private PDRectangle clipBox;
private int number;

public Page(PDRectangle newClipBox) {
public Page(PDRectangle newClipBox, int newNumber) {
texts = new ArrayList<Text>();
yPosMap = new HashMap<Float, ArrayList<Text>>();
clipBox = newClipBox;
number = newNumber;
}

public void addText(Text t) {
Expand Down Expand Up @@ -49,6 +51,10 @@ public List<Text> getText() {
return texts;
}

public int getNumber() {
return number;
}

public List<Text> getTextAtY(float y) {
Float fObj = new Float(y);
if (yPosMap.containsKey(fObj)) {
Expand Down

0 comments on commit bc595f6

Please sign in to comment.