Skip to content

Commit

Permalink
Add a new Exception type to indicate inconsistent data.
Browse files Browse the repository at this point in the history
In rare cases, the text format includes line breaks or other elements that break GNormPlus processing. This results in unrecoverable errors. The new Exception type indicates such cases so that the calling code can react accordingly.
  • Loading branch information
khituras committed May 20, 2024
1 parent a3ae317 commit c39bf5a
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>de.julielab</groupId>
<artifactId>julielab-gnormplus</artifactId>
<packaging>jar</packaging>
<version>1.0.1</version>
<version>1.0.2</version>
<name>JULIE Lab GNormPlus</name>
<url>https://www.ncbi.nlm.nih.gov/research/bionlp/Tools/gnormplus/</url>

Expand Down
46 changes: 43 additions & 3 deletions src/GNormPluslib/GNR.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@

import java.io.*;
import java.util.*;
import java.util.concurrent.Exchanger;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;

import com.ctc.wstx.io.WstxInputSource;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.englishStemmer;

Expand Down Expand Up @@ -170,8 +173,45 @@ public void LoadInputFile(String Filename,String FilenameAbb,String TrainTest) t
cmd ="./Ab3P "+FilenameAbb+" "+FilenameAbb+".out";
//cmd ="java -jar bioadi.jar "+FilenameAbb+" > "+FilenameAbb+".out";
}

Process process = runtime.exec(cmd);

// We let the command run in its own thread. Then we can use process.waitFor() to set a timeout.
// We do this because in rare cases, the Ab3P program seems to run forever.
final String finalCmd = cmd;
final Process process = runtime.exec(finalCmd);
Thread t = new Thread("GNP Ab3P Runner") {
@Override
public void run() {
super.run();
try {
System.out.println("Starting to find abbreviations with command " + finalCmd);
InputStream is = process.getInputStream();
InputStreamReader isr = new InputStreamReader(is, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line="";
while ( (line = br.readLine()) != null)
{
fr.write(line);
fr.newLine();
fr.flush();
}
is.close();
isr.close();
br.close();
fr.close();

} catch (IOException e) {
System.err.println("Error in Thread to run cmd " + finalCmd);
e.printStackTrace();
}
}
};
t.start();
try {
process.waitFor(10, TimeUnit.MINUTES);
} catch (InterruptedException e) {
System.err.println("Command " + finalCmd + " was interrupted because it took too long.");
}
/*Process process = runtime.exec(cmd);
InputStream is = process.getInputStream();
InputStreamReader isr = new InputStreamReader(is, "UTF-8");
BufferedReader br = new BufferedReader(isr);
Expand All @@ -185,7 +225,7 @@ public void LoadInputFile(String Filename,String FilenameAbb,String TrainTest) t
is.close();
isr.close();
br.close();
fr.close();
fr.close();*/
//Abb output -> Hash
BufferedReader inputfile = new BufferedReader(new InputStreamReader(new FileInputStream(FilenameAbb+".out"), "UTF-8"));
line="";
Expand Down
32 changes: 32 additions & 0 deletions src/GNormPluslib/InconsistentDataException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package GNormPluslib;

public class InconsistentDataException extends RuntimeException {
private String docId;

public String getDocId() {
return docId;
}

public void setDocId(String docId) {
this.docId = docId;
}

public InconsistentDataException() {
}

public InconsistentDataException(String message) {
super(message);
}

public InconsistentDataException(String message, Throwable cause) {
super(message, cause);
}

public InconsistentDataException(Throwable cause) {
super(cause);
}

public InconsistentDataException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
}
7 changes: 5 additions & 2 deletions src/GNormPluslib/SR.java
Original file line number Diff line number Diff line change
Expand Up @@ -816,8 +816,11 @@ public void SpeciesAssignment(String Filename,String FilenameBioC) throws IOExce
String G_mentions = anno[2];
String G_type = anno[3];
String G_mention_list[]=G_mentions.split("\\|");
if (G_mention_list.length == 0)
throw new IllegalStateException("There is no gene mention but at least one was expected in document with ID " + data.getBioCDocobj().PMIDs.get(i) + " in paragraph with offset " + data.getBioCDocobj().PassageOffsets.get(i).get(j) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)));
if (G_mention_list.length == 0) {
InconsistentDataException e = new InconsistentDataException("There is no gene mention but at least one was expected in document with ID " + data.getBioCDocobj().PMIDs.get(i) + " in paragraph with offset " + data.getBioCDocobj().PassageOffsets.get(i).get(j) + " and length " + PassageContext.length() + " beginning with " + PassageContext.substring(0, Math.min(PassageContext.length(), 80)));
e.setDocId(data.getBioCDocobj().PMIDs.get(i));
throw e;
}
String G_mention=G_mention_list[0]; // only use the first term to detect species ; should be updated after SimConcept

/** 1. prefix */
Expand Down

0 comments on commit c39bf5a

Please sign in to comment.