Skip to content

Commit

Permalink
Merge pull request #269 from JULIELab/heading_bug
Browse files Browse the repository at this point in the history
Heading bug
  • Loading branch information
khituras authored Jul 3, 2024
2 parents 1f00d79 + 9fdcfe0 commit 48e0dfa
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public List<Document> createDocuments(JCas jCas) throws FieldGenerationException
Multimap<Sentence, Document> sentence2relDocs = HashMultimap.create();
String docId = getDocumentId(jCas);
Map<FlattenedRelation, Collection<Zone>> zoneIndex = JCasUtil.indexCovering(jCas, FlattenedRelation.class, Zone.class);
Map<Caption, Collection<Title>> captionTitleIndex = JCasUtil.indexCovered(jCas, Caption.class, Title.class);
Map<FlattenedRelation, Collection<Sentence>> sentIndex = JCasUtil.indexCovering(jCas, FlattenedRelation.class, Sentence.class);
try {
int i = 0;
Expand All @@ -72,7 +73,7 @@ public List<Document> createDocuments(JCas jCas) throws FieldGenerationException
Sentence overlappingSentence = overlappingSentences.stream().findAny().get();
sentenceDocument = createSentenceDocument(jCas, docId, i, overlappingSentence, argPair, rel);
// Likewise for the paragraph-like containing annotation of the relation
Document paragraphDocument = createParagraphDocument(jCas, docId, rel, argPair, zoneIndex);
Document paragraphDocument = createParagraphDocument(jCas, docId, rel, argPair, zoneIndex, captionTitleIndex);

// skip events extracted from PMC abstracts when there exists a corresponding PubMed document
if (paragraphDocument.containsKey("textscope") && paragraphDocument.get("textscope").toString().equals("abstract") && relDoc.get("source").toString().equals("pmc") && relDoc.containsKey("pmid")) {
Expand Down Expand Up @@ -337,7 +338,7 @@ private String getDocumentId(JCas jCas) {
return docId;
}

private Document createParagraphDocument(JCas jCas, String docId, FlattenedRelation rel, FeatureStructure[] argPair, Map<FlattenedRelation, Collection<Zone>> zoneIndex) throws CASException, FieldGenerationException {
private Document createParagraphDocument(JCas jCas, String docId, FlattenedRelation rel, FeatureStructure[] argPair, Map<FlattenedRelation, Collection<Zone>> zoneIndex, Map<Caption, Collection<Title>> captionTitleIndex) throws CASException, FieldGenerationException {
List<Zone> zonesAscending = zoneIndex.get(rel).stream().sorted(Comparator.comparingInt(z -> z.getEnd() - z.getBegin())).collect(Collectors.toList());
ArrayFieldValue zoneHeadings = new ArrayFieldValue();
IFieldValue textScope = null;
Expand Down Expand Up @@ -373,8 +374,12 @@ else if (z instanceof Caption)
if (textScope == null)
textScope = new RawToken("body");
} else if (z instanceof Caption) {
zoneHeadings.add(new RawToken(removeSectionNumbering(z.getCoveredText())));
textScope = new RawToken(((Caption) z).getCaptionType());
// Try to find Titles overlapping this Caption. If there is one, it should be the Caption Heading.
final Optional<Title> titleOpt = captionTitleIndex.get(z).stream().findAny();
if (titleOpt.isPresent()) {
zoneHeadings.add(new RawToken(removeSectionNumbering(titleOpt.get().getCoveredText())));
textScope = new RawToken(((Caption) z).getCaptionType());
}
}
}
// If we couldn't find one of the specified structures, use the smallest one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@

<value>

<string>gepi.reldocs</string>
<string>gepi._documents_mirror</string>

</value>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@

<value>

<string>gepi_1.0_2</string>
<string>gepi_1.0_3</string>

</value>

Expand Down

0 comments on commit 48e0dfa

Please sign in to comment.