forked from broadgsa/gatk
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4 348d0f76-0448-11de-a6fe-93d51630548a
- Loading branch information
depristo
committed
Feb 26, 2009
0 parents
commit 59083a4
Showing
217 changed files
with
33,051 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,242 @@ | ||
/* | ||
* The Broad Institute | ||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT | ||
* This software and its documentation are copyright 2008 by the | ||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. | ||
* | ||
* This software is supplied without any warranty or guaranteed support whatsoever. | ||
* Neither the Broad Institute nor MIT can be responsible for its use, misuse, | ||
* or functionality. | ||
*/ | ||
package edu.mit.broad.arachne; | ||
|
||
|
||
/** | ||
* This class represents an arachne LookAlign alignment (or other related data structures). | ||
*/ | ||
public class Alignment { | ||
|
||
private static final char TAB = '\t'; | ||
|
||
private int mASequenceId; | ||
private int mASequenceLength; | ||
private int mAStart; | ||
private int mAEnd; | ||
private int mBSequenceId; | ||
private int mBSequenceLength; | ||
private int mBStart; | ||
private int mBEnd; | ||
private char mOrientation; | ||
private int[] mAlignmentBlocks; | ||
|
||
|
||
public Alignment() { | ||
} | ||
|
||
public int getASequenceId() { | ||
return mASequenceId; | ||
} | ||
|
||
public void setASequenceId(int value) { | ||
mASequenceId = value; | ||
} | ||
|
||
public int getASequenceLength() { | ||
return mASequenceLength; | ||
} | ||
|
||
public void setASequenceLength(int value) { | ||
mASequenceLength = value; | ||
} | ||
|
||
public int getAStart() { | ||
return mAStart; | ||
} | ||
|
||
public void setAStart(int value) { | ||
mAStart = value; | ||
} | ||
|
||
public int getAEnd() { | ||
return mAEnd; | ||
} | ||
|
||
public void setAEnd(int value) { | ||
mAEnd = value; | ||
} | ||
|
||
public int getBSequenceId() { | ||
return mBSequenceId; | ||
} | ||
|
||
public void setBSequenceId(int value) { | ||
mBSequenceId = value; | ||
} | ||
|
||
public int getBSequenceLength() { | ||
return mBSequenceLength; | ||
} | ||
|
||
public void setBSequenceLength(int value) { | ||
mBSequenceLength = value; | ||
} | ||
|
||
public int getBStart() { | ||
return mBStart; | ||
} | ||
|
||
public void setBStart(int value) { | ||
mBStart = value; | ||
} | ||
|
||
public int getBEnd() { | ||
return mBEnd; | ||
} | ||
|
||
public void setBEnd(int value) { | ||
mBEnd = value; | ||
} | ||
|
||
public char getOrientation() { | ||
return mOrientation; | ||
} | ||
|
||
public void setOrientation(char value) { | ||
mOrientation = value; | ||
} | ||
|
||
public int[] getAlignmentBlocks() { | ||
return mAlignmentBlocks; | ||
} | ||
|
||
public void setAlignmentBlocks(int[] value) { | ||
mAlignmentBlocks = value; | ||
} | ||
|
||
public static Alignment parse(String text) { | ||
|
||
if (text == null) { | ||
return null; | ||
} | ||
|
||
String[] fields = text.trim().split("\t"); | ||
if (fields.length == 0) { | ||
return null; | ||
} | ||
|
||
if (!fields[0].equals("QUERY")) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
if (fields.length < 14) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
|
||
int seqAId = parseIntField(fields[1]); | ||
int seqAStart = parseIntField(fields[2]); | ||
int seqAEnd = parseIntField(fields[3]); | ||
int seqALength = parseIntField(fields[4]); | ||
int orientation = parseIntField(fields[5]); | ||
int seqBId = parseIntField(fields[6]); | ||
int seqBStart = parseIntField(fields[7]); | ||
int seqBEnd = parseIntField(fields[8]); | ||
int seqBLength = parseIntField(fields[9]); | ||
int blockCount = parseIntField(fields[10]); | ||
|
||
if (seqAStart < 0 || seqAEnd <= 0 || seqALength <= 0 || | ||
seqAStart >= seqALength || seqAEnd > seqALength || seqAStart >= seqAEnd) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
if (seqBStart < 0 || seqBEnd <= 0 || seqBLength <= 0 || | ||
seqBStart >= seqBLength || seqBEnd > seqBLength || seqBStart >= seqBEnd) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
if (orientation < 0 || orientation > 1) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
if (fields.length != (11 + 3*blockCount)) { | ||
throw new IllegalArgumentException("Invalid alignment: " + text); | ||
} | ||
|
||
int[] alignmentBlocks = new int[3*blockCount]; | ||
for (int i = 0; i < 3*blockCount; i++) { | ||
alignmentBlocks[i] = parseIntField(fields[11 + i]); | ||
} | ||
|
||
Alignment alignment = new Alignment(); | ||
alignment.setASequenceId(seqAId); | ||
alignment.setASequenceLength(seqALength); | ||
alignment.setAStart(seqAStart+1); | ||
alignment.setAEnd(seqAEnd); | ||
alignment.setBSequenceId(seqBId); | ||
alignment.setBSequenceLength(seqBLength); | ||
alignment.setBStart(seqBStart+1); | ||
alignment.setBEnd(seqBEnd); | ||
alignment.setOrientation((orientation == 0) ? '+' : '-'); | ||
alignment.setAlignmentBlocks(alignmentBlocks); | ||
return alignment; | ||
} | ||
|
||
private static int parseIntField(String text) { | ||
try { | ||
return Integer.parseInt(text); | ||
} catch (NumberFormatException exc) { | ||
throw new IllegalArgumentException("Illegal alignment field: " + text); | ||
} | ||
} | ||
|
||
public String arachneFormat() { | ||
StringBuilder builder = new StringBuilder(); | ||
builder.append("QUERY"); | ||
builder.append(TAB); | ||
builder.append(mASequenceId); | ||
builder.append(TAB); | ||
builder.append(mAStart-1); // zero based | ||
builder.append(TAB); | ||
builder.append(mAEnd); | ||
builder.append(TAB); | ||
builder.append(mASequenceLength); | ||
builder.append(TAB); | ||
builder.append(mOrientation == '+' ? 0 : 1); | ||
builder.append(TAB); | ||
builder.append(mBSequenceId); | ||
builder.append(TAB); | ||
builder.append(mBStart-1); // zero based | ||
builder.append(TAB); | ||
builder.append(mBEnd); | ||
builder.append(TAB); | ||
builder.append(mBSequenceLength); | ||
builder.append(TAB); | ||
builder.append(mAlignmentBlocks.length / 3); | ||
for (int i = 0; i < mAlignmentBlocks.length; i++) { | ||
builder.append(TAB); | ||
builder.append(mAlignmentBlocks[i]); | ||
} | ||
return builder.toString(); | ||
} | ||
|
||
public String format() { | ||
StringBuilder builder = new StringBuilder(); | ||
builder.append("Alignment"); | ||
builder.append(' '); | ||
builder.append(mASequenceId); | ||
builder.append(' '); | ||
builder.append(mAStart); | ||
builder.append(' '); | ||
builder.append(mAEnd); | ||
builder.append(' '); | ||
builder.append(mOrientation); | ||
builder.append(' '); | ||
builder.append(mBSequenceId); | ||
builder.append(' '); | ||
builder.append(mBStart); | ||
builder.append(' '); | ||
builder.append(mBEnd); | ||
builder.append(' '); | ||
builder.append(mAlignmentBlocks.length / 3); | ||
for (int i = 0; i < mAlignmentBlocks.length; i++) { | ||
builder.append(' '); | ||
builder.append(mAlignmentBlocks[i]); | ||
} | ||
return builder.toString(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
/* | ||
* The Broad Institute | ||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT | ||
* This software and its documentation are copyright 2008 by the | ||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. | ||
* | ||
* This software is supplied without any warranty or guaranteed support whatsoever. | ||
* Neither the Broad Institute nor MIT can be responsible for its use, misuse, | ||
* or functionality. | ||
*/ | ||
package edu.mit.broad.arachne; | ||
|
||
import java.io.*; | ||
|
||
/** | ||
* Utility to convert fastb to fasta files. | ||
* More importantly, can be used to extract a subset of the reads. | ||
*/ | ||
public class Fastb2Fasta { | ||
|
||
private boolean mVerbose = false; | ||
private boolean mDebug = false; | ||
private String mInputPath = null; | ||
private String mIdListFilePath = null; | ||
|
||
|
||
public static void main(String[] args) | ||
throws Exception { | ||
new Fastb2Fasta().run(args); | ||
} | ||
|
||
private void usage() { | ||
System.out.println("Usage: Fastb2Fasta ... <fastb-file>"); | ||
System.out.println(" -idlist <file-of-read-ids>"); | ||
System.out.println(" -verbose"); | ||
System.out.println(" -debug"); | ||
} | ||
|
||
private boolean parseArguments(String[] args) { | ||
|
||
int argpos = 0; | ||
int argsleft = 0; | ||
|
||
while (argpos < args.length) { | ||
argsleft = args.length - argpos; | ||
String arg = args[argpos]; | ||
if (arg.equals("-idlist") && argsleft > 1) { | ||
argpos++; | ||
mIdListFilePath = args[argpos++]; | ||
} else if (arg.equals("-verbose")) { | ||
argpos++; | ||
mVerbose = true; | ||
} else if (arg.equals("-debug")) { | ||
argpos++; | ||
mDebug = true; | ||
} else if (arg.startsWith("-")) { | ||
usage(); | ||
return false; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
argsleft = args.length - argpos; | ||
if (argsleft != 1) { | ||
usage(); | ||
return false; | ||
} | ||
|
||
mInputPath = args[argpos]; | ||
return true; | ||
} | ||
|
||
private void run(String[] args) | ||
throws Exception { | ||
|
||
if (!parseArguments(args)) { | ||
System.exit(1); | ||
} | ||
|
||
FastbReader fastbReader = new FastbReader(new File(mInputPath)); | ||
try { | ||
if (mIdListFilePath != null) { | ||
LineNumberReader reader = new LineNumberReader(new FileReader(mIdListFilePath)); | ||
while (true) { | ||
String line = reader.readLine(); | ||
if (line == null) { | ||
reader.close(); | ||
break; | ||
} | ||
Integer id = parseReadId(line); | ||
if (id == null) { | ||
continue; | ||
} | ||
if (id < 0 || id >= fastbReader.getSequenceCount()) { | ||
System.out.println("ERROR: Illegal sequence id: " + id); | ||
System.exit(1); | ||
} | ||
String sequence = fastbReader.readSequence(id); | ||
System.out.println(">" + id); | ||
System.out.println(sequence); | ||
} | ||
} else { | ||
int id = 0; | ||
while (fastbReader.hasNext()) { | ||
String sequence = fastbReader.next(); | ||
System.out.println(">" + id); | ||
System.out.println(sequence); | ||
id++; | ||
} | ||
} | ||
} finally { | ||
fastbReader.close(); | ||
} | ||
} | ||
|
||
private Integer parseReadId(String line) { | ||
String text = line.trim(); | ||
if (text.length() == 0 || text.charAt(0) == '#') { | ||
return null; | ||
} | ||
String token = text.split("\\s+")[0]; | ||
Integer id = null; | ||
try { | ||
id = new Integer(token); | ||
} catch (NumberFormatException exc) { | ||
System.out.println("ERROR: Invalid sequence id: " + token); | ||
System.exit(1); | ||
} | ||
return id; | ||
} | ||
} |
Oops, something went wrong.