flowscience
diff --git a/‎Python/ANISEED_ISHparse2.py
+79 b/‎Python/ANISEED_ISHparse2.py
+79
diff --git a/‎Python/ANISEED_ISHparseET.py
+31 b/‎Python/ANISEED_ISHparseET.py
+31
diff --git a/‎Python/DE_compare.py
+175 b/‎Python/DE_compare.py
+175
@@ -0,0 +1,79 @@
+#-------------------------------------------------------------------------------
+# Name:        module1
+# Purpose:
+#
+# Author:      Eli
+#
+# Created:     14/04/2015
+# Copyright:   (c) Eli 2015
+# Licence:     <your licence>
+#-------------------------------------------------------------------------------
+
+def main():
+    pass
+
+if __name__ == '__main__':
+    main()
+
+import sys
+
+#Read data
+f_in = open("C:/users/eli/downloads/ish/ish_ci.xml")
+
+#Print header row
+sys.stdout.write("gene" +"\t"+ "stage" +"\t"+ "tissue" +"\t"+ "pattern" +"\t"+ "original" +"\t"+ "source" +"\t"+ "exp" +"\t"+ "note" +'\n')
+
+#Set number of header rows to skip
+headers = 3
+count = 1
+source = False
+
+for line in f_in:
+
+    #Skip specified number of header lines
+    if count <= headers:
+        count+=1
+        pass
+
+    #Parse desired information
+    else:
+        if "experiment id" in line:
+            exp = "NA"
+            stage = "NA"
+            stage_source = "NA"
+            gene = "NA"
+            tissue = "NA"
+            note = "NA"
+            pattern = "NA"
+            original = "NA"
+            exp = line.split('"')[1]
+        if "developmental_stage" in line and source == False:
+            stage = line.split(">")[1].split('<')[0]
+            source = True
+
+        if "developmental_stage" in line and source == True:
+            stage_source = line.split(">")[1].split('<')[0]
+            source = False
+
+        if "probe_gene_predicted" in line:
+            gene = line.split(">")[1].split('<')[0]
+
+        if "staining_localization" in line:
+            tissue = line.split(">")[1].split('<')[0]
+
+        if "image_note" in line:
+            note = line.split(">")[1].split('<')[0]
+            if "Expression pattern:" in note:
+                pattern = note.split("Expression pattern:")[1].split("<")[0]
+
+            if "Original annotation:" in note:
+                original = note.split("Original annotation:")[1].split(".")[0]
+
+        else:
+            pass
+
+        #Output information for parsed record
+        if "/experiment" in line:
+            #results = [gene, stage, tissue, pattern, original, exp, note]
+            output = "\t".join([gene, stage, tissue, pattern, original, stage_source, exp, note])
+            sys.stdout.write(output + '\n')
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------------------------
+# Name:        module2
+# Purpose:
+#
+# Author:      Eli
+#
+# Created:     14/04/2015
+# Copyright:   (c) Eli 2015
+# Licence:     <your licence>
+#-------------------------------------------------------------------------------
+
+def main():
+    pass
+
+if __name__ == '__main__':
+    main()
+
+#http://effbot.org/zone/element.htm
+
+import elementtree.ElementTree as ET
+import sys
+
+tree = ET.parse("C:/users/eli/downloads/ish/ish_ci.xml")
+
+# the tree root is the toplevel html element
+print tree.findtext("experiment")
+
+# if you need the root element, use getroot
+root = tree.getroot()
+
+print root
@@ -0,0 +1,175 @@
+#-------------------------------------------------------------------------------
+# Name:        module1
+# Purpose:
+#
+# Author:      Eli
+#
+# Created:     09/04/2014
+# Copyright:   (c) Eli 2014
+# Licence:     <your licence>
+#-------------------------------------------------------------------------------
+
+def main():
+    pass
+
+if __name__ == '__main__':
+    main()
+
+f1 = open("C:/rnaseq/polyA_data/clusters/1dpa_DE_fdr05_redo.txt", "r")
+f2 = open("C:/rnaseq/polyA_data/clusters/3dpa_DE_fdr05_redo.txt", "r")
+f3 = open("C:/rnaseq/polyA_data/clusters/8dpa_DE_fdr05_redo.txt", "r")
+
+#Loop through files and extract ID's to lists
+f1_ids = []
+f2_ids = []
+f3_ids = []
+
+count = 1
+for line in f1:
+    if count == 1: #skip header row
+        count += 1
+        pass
+    else:
+        f1_ids.append(str(line.split("\t")[0]))
+count = 1
+for line in f2:
+    if count == 1: #skip header row
+        count += 1
+        pass
+    else:
+        f2_ids.append(str(line.split("\t")[0]))
+count = 1
+for line in f3:
+    if count == 1: #skip header row
+        count += 1
+        pass
+    else:
+        f3_ids.append(str(line.split("\t")[0]))
+
+#Cross check ID lists, errors can occur if the files aren't formatted properly.
+#print f1_ids, f2_ids, f3_ids
+
+f1 = open("C:/rnaseq/polyA_data/clusters/1dpa_DE_fdr05_redo.txt", "r")
+f2 = open("C:/rnaseq/polyA_data/clusters/3dpa_DE_fdr05_redo.txt", "r")
+f3 = open("C:/rnaseq/polyA_data/clusters/8dpa_DE_fdr05_redo.txt", "r")
+
+linecount = 1
+for line in f1: # check whether ids in first condition matched others
+    added = 0
+    if linecount == 1:
+        linecount += 1
+    else:
+        for ids2 in f2_ids:
+            if ids2 in line: # 1dpa id matched to 3dpa id
+                for ids3 in f3_ids:
+                    if ids3 in line: #1dpa id also matched to 8dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-3-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("1dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+
+                    else: #1dpa id only matched to 3dpa id
+                        pass
+                with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-3dpa_redo.txt", "a") as file_out:
+                            file_out.write("1dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+        # id is unique to 1st condition
+        for ids3 in f3_ids:
+                    if ids3 in line: #1dpa matched only to 8dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("1dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+                    else:
+                        #print "unique to first condition"
+                        pass
+        if added == 0:
+            with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1dpa_unique_redo.txt", "a") as file_out:
+                            file_out.write("1dpa" + "\t" + line)
+                            file_out.close()
+                            continue
+
+linecount = 1
+for line in f2: # check whether ids in second condition matched others
+    added = 0
+    if linecount == 1:
+        linecount += 1
+    else:
+        for ids1 in f1_ids:
+            if ids1 in line: # 3dpa id matched to 1dpa id
+                for ids3 in f3_ids:
+                    #print "8dpa", ids
+                    if ids3 in line: #1dpa id also matched to 8dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-3-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("3dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+                    else: #3dpa id only matched to 1dpa id
+                        pass
+                with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-3dpa_redo.txt", "a") as file_out:
+                            file_out.write("3dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+        for ids3 in f3_ids:
+                    if ids3 in line: #3dpa matched only to 8dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_3-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("3dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+                    else:
+                        pass
+        if added == 0:
+            with open("C:/RNAseq/polyA_data/clusters/edgeR_de_3dpa_unique_redo.txt", "a") as file_out:
+                            file_out.write("3dpa" + "\t" + line)
+                            file_out.close()
+                            continue
+
+
+linecount = 1
+for line in f3: # check whether ids in 3rd condition matched others
+    added = 0
+    if linecount == 1:
+        linecount += 1
+    else:
+        for ids1 in f1_ids:
+            if ids1 in line: # 8dpa id matched to 1dpa id
+                for ids2 in f2_ids:
+                    #print "3dpa", ids
+                    if ids2 in line: #8dpa id also matched to 3dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-3-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("8dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+                    else: #8dpa id only matched to 1dpa id
+                        pass
+                with open("C:/RNAseq/polyA_data/clusters/edgeR_de_1-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("8dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+        for ids2 in f2_ids:
+                    if ids2 in line: #8dpa matched only to 3dpa
+                        with open("C:/RNAseq/polyA_data/clusters/edgeR_de_3-8dpa_redo.txt", "a") as file_out:
+                            file_out.write("8dpa" + "\t" + line)
+                            file_out.close()
+                            added = 1
+                            break
+                    else:
+                        pass
+        if added == 0:
+            with open("C:/RNAseq/polyA_data/clusters/edgeR_de_8dpa_unique_redo.txt", "a") as file_out:
+                            file_out.write("8dpa" + "\t" + line)
+                            file_out.close()
+                            continue
+
+f1.close
+f2.close
+f3.close