@@ -11,6 +11,7 @@ require 'optparse'
1111#
1212
1313# Update history
14+ # 2024-07-04 Record all sample attributes in the sample attribute elements
1415# 2024-05-17 Change the way of describing Sample attributes
1516# 2022-12-23 Change handling of submission date
1617# 2022-12-22 AGD
@@ -352,9 +353,12 @@ for num, line in sample_a
352353 else
353354
354355 # 追加属性があれば
355- if line [ 17 ..-1 ] . size > 0
356- line [ 17 ..-1 ] . each_with_index { |attr , idx |
357- added_attr_h . store ( added_attr_name_a [ idx ] , attr . to_s . strip ) if added_attr_name_a [ idx ] && attr && attr . to_s
356+ unless line [ 17 ..-1 ] . all? { |e | e . empty? || e . nil? }
357+ line [ 17 ..-1 ] . each_with_index { |attr , idx |
358+ if added_attr_name_a [ idx ] && attr && attr . to_s && !attr . to_s . empty?
359+ added_attr_h . store ( added_attr_name_a [ idx ] , attr . to_s . strip )
360+ raise "Replace the default explanation \" Add additional phenotypes ...\" by an user-defined attribute name." if added_attr_name_a [ idx ] =~ /Add additional phenotypes/i
361+ end
358362 }
359363 end
360364
@@ -896,19 +900,19 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
896900 for sam in samples_a
897901
898902 sample_set . SAMPLE ( "accession" => "" , "center_name" => center_name , "alias" => sam [ 0 ] ) { |sample |
899- sample . TITLE ( sam [ 2 ] )
903+ sample . TITLE ( sam [ 2 ] . to_s . strip )
900904 sample . SAMPLE_NAME { |sample_name |
901905
902906 sample_name . TAXON_ID ( "9606" )
903907 sample_name . SCIENTIFIC_NAME ( "Homo sapiens" )
904908 sample_name . COMMON_NAME ( "Human" )
905909
906- sample_name . DONOR_ID ( sam [ 4 ] )
910+ sample_name . DONOR_ID ( sam [ 4 ] . to_s . strip )
907911
908912 }
909913
910- sample . SAMPLE_GROUP_TYPE ( sam [ 12 ] )
911- sample . DESCRIPTION ( sam [ 3 ] )
914+ sample . SAMPLE_GROUP_TYPE ( sam [ 12 ] . to_s . strip )
915+ sample . DESCRIPTION ( sam [ 3 ] . to_s . strip )
912916
913917 # attributes
914918 sample . SAMPLE_ATTRIBUTES { |sample_attributes |
@@ -919,8 +923,26 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
919923 sample_attribute . VALUE ( sam [ 1 ] . to_s . strip )
920924 }
921925
926+ # sample title
927+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
928+ sample_attribute . TAG ( "sample_title" )
929+ sample_attribute . VALUE ( sam [ 2 ] . to_s . strip )
930+ }
931+
932+ # description
933+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
934+ sample_attribute . TAG ( "description" )
935+ sample_attribute . VALUE ( sam [ 3 ] . to_s . strip )
936+ }
937+
938+ # subject id
939+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
940+ sample_attribute . TAG ( "subject_id" )
941+ sample_attribute . VALUE ( sam [ 4 ] . to_s . strip )
942+ }
943+
922944 # sex
923- if sam [ 5 ]
945+ if sam [ 5 ] && ! sam [ 5 ] . empty?
924946 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
925947 sample_attribute . TAG ( "sex" )
926948 sample_attribute . VALUE ( sam [ 5 ] . strip )
@@ -930,7 +952,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
930952 end
931953
932954 # age
933- if sam [ 6 ]
955+ if sam [ 6 ] && ! sam [ 6 ] . empty?
934956 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
935957 sample_attribute . TAG ( "age" )
936958 sample_attribute . VALUE ( sam [ 6 ] . strip )
@@ -940,7 +962,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
940962 end
941963
942964 # residence
943- if sam [ 7 ]
965+ if sam [ 7 ] && ! sam [ 7 ] . empty?
944966 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
945967 sample_attribute . TAG ( "residence" )
946968 sample_attribute . VALUE ( sam [ 7 ] . strip )
@@ -950,7 +972,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
950972 end
951973
952974 # birth_location
953- if sam [ 8 ]
975+ if sam [ 8 ] && ! sam [ 8 ] . empty?
954976 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
955977 sample_attribute . TAG ( "birth_location" )
956978 sample_attribute . VALUE ( sam [ 8 ] . strip )
@@ -960,7 +982,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
960982 end
961983
962984 # disease
963- if sam [ 9 ]
985+ if sam [ 9 ] && ! sam [ 9 ] . empty?
964986 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
965987 sample_attribute . TAG ( "disease" )
966988 sample_attribute . VALUE ( sam [ 9 ] . strip )
@@ -970,7 +992,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
970992 end
971993
972994 # ICD10
973- if sam [ 10 ]
995+ if sam [ 10 ] && ! sam [ 10 ] . empty?
974996 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
975997 sample_attribute . TAG ( "ICD10" )
976998 sample_attribute . VALUE ( sam [ 10 ] . strip )
@@ -980,7 +1002,7 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
9801002 end
9811003
9821004 # collection_date
983- if sam [ 11 ]
1005+ if sam [ 11 ] && ! sam [ 11 ] . empty?
9841006 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9851007 sample_attribute . TAG ( "collection_date" )
9861008 sample_attribute . VALUE ( sam [ 11 ] . strip )
@@ -990,39 +1012,39 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
9901012 end
9911013
9921014 # affection status
993- if sam [ 12 ]
1015+ if sam [ 12 ] && ! sam [ 12 ] . empty?
9941016 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
9951017 sample_attribute . TAG ( "affection_status" )
9961018 sample_attribute . VALUE ( sam [ 12 ] . strip )
9971019 }
9981020 end
9991021
10001022 # tissue
1001- if sam [ 13 ]
1023+ if sam [ 13 ] && ! sam [ 13 ] . empty?
10021024 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
10031025 sample_attribute . TAG ( "tissue" )
10041026 sample_attribute . VALUE ( sam [ 13 ] )
10051027 }
10061028 end
10071029
10081030 # population
1009- if sam [ 14 ]
1031+ if sam [ 14 ] && ! sam [ 14 ] . empty?
10101032 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
10111033 sample_attribute . TAG ( "population" )
10121034 sample_attribute . VALUE ( sam [ 14 ] )
10131035 }
10141036 end
10151037
10161038 # histological_type
1017- if sam [ 15 ]
1039+ if sam [ 15 ] && ! sam [ 15 ] . empty?
10181040 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
10191041 sample_attribute . TAG ( "histological_type" )
10201042 sample_attribute . VALUE ( sam [ 15 ] )
10211043 }
10221044 end
10231045
10241046 # is_tumor
1025- if sam [ 16 ]
1047+ if sam [ 16 ] && ! sam [ 16 ] . empty?
10261048 sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
10271049 sample_attribute . TAG ( "is_tumor" )
10281050 sample_attribute . VALUE ( sam [ 16 ] )
@@ -1046,18 +1068,20 @@ sample_f.puts xml_sample.SAMPLE_SET{|sample_set|
10461068
10471069 end
10481070
1049- else
1071+ else # if previous_sample_attrs_flag
10501072
10511073 # added attributes
10521074 sample_added_attr_h = Hash . new
10531075 sample_added_attr_h = sam [ 17 ]
10541076 unless sample_added_attr_h . empty?
10551077 sample_added_attr_h . each { |attr_name , attr_value |
1056- sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
1057- sample_attribute . TAG ( attr_name )
1058- sample_attribute . VALUE ( attr_value )
1059- }
1060- }
1078+ if attr_name && !attr_name . empty? && attr_value && !attr_value . empty?
1079+ sample_attributes . SAMPLE_ATTRIBUTE { |sample_attribute |
1080+ sample_attribute . TAG ( attr_name )
1081+ sample_attribute . VALUE ( attr_value )
1082+ }
1083+ end
1084+ }
10611085 end
10621086
10631087 end
0 commit comments