@@ -101,7 +101,7 @@ for meta in meta_object
101101 when "Study" then
102102 study_a . push ( [ i , line ] )
103103 when "Sample" then
104- sample_a . push ( [ i , line ] )
104+ sample_a . push ( [ i , line . map ( & :to_s ) ] )
105105 when "Experiment" then
106106 experiment_a . push ( [ i , line ] )
107107 when "Data" then
@@ -133,9 +133,7 @@ for num, line in submission_a
133133 submission_h . store ( "nbdc_number" , submission_a [ i +1 ] [ 1 ] [ 0 ] )
134134 end
135135
136- if line [ 0 ] == "Hold/Release"
137- submission_h . store ( "hold" , submission_a [ i +1 ] [ 1 ] [ 0 ] )
138- end
136+ submission_h . store ( "hold" , "Hold" )
139137
140138 if line [ 0 ] == "Contacts"
141139 j = num
@@ -325,11 +323,11 @@ for num, line in sample_a
325323
326324 raise "Added Sample attributes in the previous format. Use -r option." if line [ 11 ] == "Attributes"
327325
328- line [ 1 ..10 ] . each { |attr_name |
326+ line [ 1 ..16 ] . each { |attr_name |
329327 fixed_attr_name_a . push ( attr_name )
330328 }
331329
332- line [ 11 ..-1 ] . each { |attr_name |
330+ line [ 17 ..-1 ] . each { |attr_name |
333331 if attr_name && attr_name . to_s
334332 raise "Added attribute is included in fixed attributes: #{ attr_name . to_s } " if fixed_attr_name_a . map ( &:downcase ) . include? ( attr_name . to_s . downcase )
335333 added_attr_name_a . push ( attr_name . to_s . strip )
@@ -340,36 +338,31 @@ for num, line in sample_a
340338
341339 end
342340
343- if /^Sample-\d {1,6}/ =~ line [ 0 ]
344-
341+ if /^Sample-\d {1,6}/ =~ line [ 0 ] && line [ 2 ] && ! line [ 2 ] . empty?
342+
345343 added_attr_h = Hash . new
346344
347345 # alias
348346 sample_number = line [ 0 ] . split ( "-" ) [ 1 ] . to_i
349347 sample_alias = submission_id + "_Sample_" + sprintf ( "%06d" , line [ 0 ] . split ( "-" ) [ 1 ] . to_i )
350348 sample_aliases_a . push ( sample_alias )
351-
352- # Title があれば。追加属性は hash で格納
353- if line [ 4 ]
349+
350+ if previous_sample_attrs_flag
351+ samples_a . push ( [ sample_alias , line [ 1 ] , line [ 2 ] , line [ 3 ] , line [ 4 ] , line [ 5 ] , line [ 6 ] , line [ 7 ] , line [ 8 ] , line [ 9 ] , line [ 10 ] , line [ 11 ] , line [ 12 ] , line [ 13 ] , line [ 14 ] , line [ 15 ] , line [ 16 ] ] )
352+ else
354353
355- if previous_sample_attrs_flag
356- samples_a . push ( [ sample_alias , line [ 1 ] , line [ 2 ] , line [ 3 ] , line [ 4 ] , line [ 5 ] , line [ 6 ] , line [ 7 ] , line [ 8 ] , line [ 9 ] , line [ 10 ] , line [ 11 ] ] )
357- else
358-
359- # 追加属性があれば
360- if line [ 11 ..-1 ] . size > 0
361- line [ 11 ..-1 ] . each_with_index { |attr , idx |
362- added_attr_h . store ( added_attr_name_a [ idx ] , attr . to_s . strip ) if added_attr_name_a [ idx ] && attr && attr . to_s
363- }
364- end
365-
366- samples_a . push ( [ sample_alias , line [ 1 ] , line [ 2 ] , line [ 3 ] , line [ 4 ] , line [ 5 ] , line [ 6 ] , line [ 7 ] , line [ 8 ] , line [ 9 ] , line [ 10 ] , added_attr_h ] )
367-
354+ # 追加属性があれば
355+ if line [ 17 ..-1 ] . size > 0
356+ line [ 17 ..-1 ] . each_with_index { |attr , idx |
357+ added_attr_h . store ( added_attr_name_a [ idx ] , attr . to_s . strip ) if added_attr_name_a [ idx ] && attr && attr . to_s
358+ }
368359 end
369-
370- end # if line[4]
371360
372- end
361+ samples_a . push ( [ sample_alias , line [ 1 ] , line [ 2 ] , line [ 3 ] , line [ 4 ] , line [ 5 ] , line [ 6 ] , line [ 7 ] , line [ 8 ] , line [ 9 ] , line [ 10 ] , line [ 11 ] , line [ 12 ] , line [ 13 ] , line [ 14 ] , line [ 15 ] , line [ 16 ] , added_attr_h ] )
362+
363+ end
364+
365+ end # if /^Sample-\d{1,6}/ =~ line[0]
373366
374367end
375368
@@ -900,21 +893,22 @@ study_f.puts xml_study.STUDY_SET{|study_set|
900893# Sample
901894sample_f . puts xml_sample . SAMPLE_SET { |sample_set |
902895
903- for sam in samples_a
904- sample_set . SAMPLE ( "accession" => "" , "center_name" => center_name , "alias" => sam [ 0 ] ) { |sample |
905- sample . TITLE ( sam [ 4 ] )
896+ for sam in samples_a
897+
898+ sample_set . SAMPLE ( "accession" => "" , "center_name" => center_name , "alias" => sam [ 0 ] ) { |sample |
899+ sample . TITLE ( sam [ 2 ] )
906900 sample . SAMPLE_NAME { |sample_name |
907901
908902 sample_name . TAXON_ID ( "9606" )
909903 sample_name . SCIENTIFIC_NAME ( "Homo sapiens" )
910904 sample_name . COMMON_NAME ( "Human" )
911905
912- sample_name . DONOR_ID ( sam [ 2 ] )
906+ sample_name . DONOR_ID ( sam [ 4 ] )
913907
914908 }
915909
916- sample . SAMPLE_GROUP_TYPE ( sam [ 6 ] )
917- sample . DESCRIPTION ( sam [ 5 ] )
910+ sample . SAMPLE_GROUP_TYPE ( sam [ 12 ] )
911+ sample . DESCRIPTION ( sam [ 3 ] )
918912
919913 # attributes
920914 sample . SAMPLE_ATTRIBUTES { |sample_attributes |
@@ -925,63 +919,125 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
925919 sample_attribute . VALUE ( sam [ 1 ] . to_s . strip )
926920 }
927921
928- # gender
929- if sam [ 3 ]
922+ # sex
923+ if sam [ 5 ]
930924 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
931- sample_attribute . TAG ( "gender " )
932- sample_attribute . VALUE ( sam [ 3 ] . strip )
925+ sample_attribute . TAG ( "sex " )
926+ sample_attribute . VALUE ( sam [ 5 ] . strip )
933927
934928 first = false
935929 }
936930 end
937931
938- # affection status
932+ # age
939933 if sam [ 6 ]
940934 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
941- sample_attribute . TAG ( "affection_status " )
935+ sample_attribute . TAG ( "age " )
942936 sample_attribute . VALUE ( sam [ 6 ] . strip )
937+
938+ first = false
943939 }
944940 end
945941
946- # tissue
942+ # residence
947943 if sam [ 7 ]
944+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
945+ sample_attribute . TAG ( "residence" )
946+ sample_attribute . VALUE ( sam [ 7 ] . strip )
947+
948+ first = false
949+ }
950+ end
951+
952+ # birth_location
953+ if sam [ 8 ]
954+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
955+ sample_attribute . TAG ( "birth_location" )
956+ sample_attribute . VALUE ( sam [ 8 ] . strip )
957+
958+ first = false
959+ }
960+ end
961+
962+ # disease
963+ if sam [ 9 ]
964+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
965+ sample_attribute . TAG ( "disease" )
966+ sample_attribute . VALUE ( sam [ 9 ] . strip )
967+
968+ first = false
969+ }
970+ end
971+
972+ # ICD10
973+ if sam [ 10 ]
974+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
975+ sample_attribute . TAG ( "ICD10" )
976+ sample_attribute . VALUE ( sam [ 10 ] . strip )
977+
978+ first = false
979+ }
980+ end
981+
982+ # collection_date
983+ if sam [ 11 ]
984+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
985+ sample_attribute . TAG ( "collection_date" )
986+ sample_attribute . VALUE ( sam [ 11 ] . strip )
987+
988+ first = false
989+ }
990+ end
991+
992+ # affection status
993+ if sam [ 12 ]
994+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
995+ sample_attribute . TAG ( "affection_status" )
996+ sample_attribute . VALUE ( sam [ 12 ] . strip )
997+ }
998+ end
999+
1000+ # tissue
1001+ if sam [ 13 ]
9481002 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9491003 sample_attribute . TAG ( "tissue" )
950- sample_attribute . VALUE ( sam [ 7 ] )
1004+ sample_attribute . VALUE ( sam [ 13 ] )
9511005 }
9521006 end
9531007
9541008 # population
955- if sam [ 8 ]
1009+ if sam [ 14 ]
9561010 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9571011 sample_attribute . TAG ( "population" )
958- sample_attribute . VALUE ( sam [ 8 ] )
1012+ sample_attribute . VALUE ( sam [ 14 ] )
9591013 }
9601014 end
9611015
9621016 # histological_type
963- if sam [ 9 ]
1017+ if sam [ 15 ]
9641018 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9651019 sample_attribute . TAG ( "histological_type" )
966- sample_attribute . VALUE ( sam [ 9 ] )
1020+ sample_attribute . VALUE ( sam [ 15 ] )
9671021 }
9681022 end
9691023
9701024 # is_tumor
971- if sam [ 10 ]
1025+ if sam [ 16 ]
9721026 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9731027 sample_attribute . TAG ( "is_tumor" )
974- sample_attribute . VALUE ( sam [ 10 ] )
1028+ sample_attribute . VALUE ( sam [ 16 ] )
9751029 }
9761030 end
9771031
9781032 if previous_sample_attrs_flag
9791033
980- if sam [ 11 ] && sam [ 11 ] . split ( ";" )
1034+ if sam [ 17 ] && sam [ 17 ] . split ( ";" )
9811035
982- sam [ 11 ] . split ( ";" ) . each { |phenotype |
1036+ sam [ 17 ] . split ( ";" ) . each { |phenotype |
9831037 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
984- pp phenotype if phenotype . strip . split ( ":" ) [ 0 ] . nil?
1038+ if phenotype . strip . split ( ":" ) [ 0 ] . nil?
1039+ puts "Invalid sample attribute: #{ phenotype } "
1040+ end
9851041
9861042 sample_attribute . TAG ( phenotype . strip . split ( ":" ) [ 0 ] . strip )
9871043 sample_attribute . VALUE ( phenotype . strip . split ( ":" ) [ 1 ] . strip )
@@ -991,10 +1047,10 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
9911047 end
9921048
9931049 else
994-
995- # added attributes
1050+
1051+ # added attributes
9961052 sample_added_attr_h = Hash . new
997- sample_added_attr_h = sam [ 11 ]
1053+ sample_added_attr_h = sam [ 17 ]
9981054 unless sample_added_attr_h . empty?
9991055 sample_added_attr_h . each { |attr_name , attr_value |
10001056 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
0 commit comments