Skip to content

Commit

Permalink
Release version 1.0.0.
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras committed Nov 16, 2022
1 parent 4dfd8ee commit 9f309b4
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ GNormPlus has been developed at the NLM and is described in [1] and can be downl

## Code changes to output FamilyNames

There are two places in the code marked with a comment containing "Erik Faessler". The added conditions lead to the output of the FamilyName entities. Those entities do not receive an ID from NCBI Gene.
There are a few places in the code marked with a comment containing "Erik Faessler". Two of those changes lead to the output of the FamilyName entities. Those entities do not receive an ID from NCBI Gene.

## Refactoring to allow multi-threaded processing

Expand Down
3 changes: 3 additions & 0 deletions convertBioCDocClass.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/usr/bin/env python3
"""
This script was used to change the code of the BioCDoc class in order to use the
"""
import re

def normalize(s):
Expand Down
17 changes: 15 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>de.julielab</groupId>
<artifactId>julielab-gnormplus</artifactId>
<packaging>jar</packaging>
<version>1.0.0-SNAPSHOT</version>
<version>1.0.0</version>
<name>JULIE Lab GNormPlus</name>
<url>https://www.ncbi.nlm.nih.gov/research/bionlp/Tools/gnormplus/</url>

Expand All @@ -16,7 +16,6 @@
</parent>



<dependencies>
<dependency>
<groupId>com.pengyifan.bioc</groupId>
Expand Down Expand Up @@ -60,4 +59,18 @@
</plugin>
</plugins>
</build>
<developers>
<developer>
<name>Erik Faessler</name>
<url>https://julielab.de/Staff/Faessler/</url>
<email>[email protected]</email>
<organization>JULIE Lab Jena, Germany</organization>
<organizationUrl>https://julielab.de/</organizationUrl>
</developer>
</developers>
<scm>
<url>https://github.com/JULIELab/gnormplus</url>
<connection>scm:git:https://github.com/JULIELab/gnormplus</connection>
<developerConnection>scm:git:https://github.com/JULIELab/gnormplus</developerConnection>
</scm>
</project>
2 changes: 2 additions & 0 deletions src/GNormPluslib/BioCDoc.java
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,8 @@ public void BioCOutput(String input, String output, ArrayList<ArrayList<ArrayLis
Anno[4] = Anno[5];
}
}
if (Anno.length < 4)
throw new IllegalStateException("Document with ID " + PMID + " has annotation \"" + Arrays.toString(Anno) + "\" which is too short: A minimal length of 4 is expected.");
String type = Anno[3];
if (type.equals("GeneID")) {
type = "Gene";
Expand Down
46 changes: 28 additions & 18 deletions src/GNormPluslib/SR.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,28 @@ public void SpeciesRecognition(String Filename, String FilenameBioC, String Stra
// For anti-serum filtering
String ForwardSTR="";
String BackwardSTR="";
if(start>21)
{
ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(start-21,last);
}
else
{
ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(0,last);
}
if(PassageContext.length()>last+21)
{
BackwardSTR = PassageContext.substring(start,last+21);
}
else
{
BackwardSTR = PassageContext.substring(start,PassageContext.length());
}

String mention = anno[2];
try {
if(start>21)
{
ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(start-21,last);
}
else
{
ForwardSTR = (PassageContext+"ZZZZZZZZZZZZZZZZZZZZZZZZZZZ").substring(0,last);
}
if(PassageContext.length()>last+21)
{
BackwardSTR = PassageContext.substring(start,last+21);
}
else
{
BackwardSTR = PassageContext.substring(start,PassageContext.length());
}
} catch (Exception e) {
throw new RuntimeException("Exception in document " + Pmid + " in paragraph with offset " + data.getBioCDocobj().PassageOffsets.get(i).get(j) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)), e);
}

String mention = anno[2];
String id = anno[3];
String mention_tmp=mention.toLowerCase();
mention_tmp = mention_tmp.replaceAll("([^A-Za-z0-9@ ])", "\\\\$1");
Expand Down Expand Up @@ -166,6 +170,10 @@ else if(!id.equals("NA"))
String anno[]=locations.get(k).split("\t");
int start= Integer.parseInt(anno[0]);
int last= Integer.parseInt(anno[1]);
if (last > PassageContext.length()) {
// Erik Faessler: We had offset issues with texts that contain non-ASCII characters
continue;
}
String mention = anno[2];
String id = anno[3];
if(data.getBioCDocobj().Annotations.size()>i && data.getBioCDocobj().Annotations.get(i).size()>j)
Expand Down Expand Up @@ -804,6 +812,8 @@ public void SpeciesAssignment(String Filename,String FilenameBioC) throws IOExce
String G_mentions = anno[2];
String G_type = anno[3];
String G_mention_list[]=G_mentions.split("\\|");
if (G_mention_list.length == 0)
throw new IllegalStateException("There is no gene mention but at least one was expected in document with ID " + data.getBioCDocobj().PMIDs.get(i) + " in paragraph with offset " + data.getBioCDocobj().PassageOffsets.get(i).get(j) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)));
String G_mention=G_mention_list[0]; // only use the first term to detect species ; should be updated after SimConcept

/** 1. prefix */
Expand Down

0 comments on commit 9f309b4

Please sign in to comment.