@@ -62,7 +62,6 @@ class LinearSVMModel:
62
62
63
63
64
64
def data_preprocess (args ):
65
- # Load data
66
65
if args .ent :
67
66
diagrams = feature_extraction ()[0 ]
68
67
else :
@@ -72,49 +71,19 @@ def data_preprocess(args):
72
71
73
72
data_list = []
74
73
target_list = []
75
-
76
74
for task in range (1 , 56 ): # Assuming only one task for now
77
75
task_col = cast .iloc [:, task ]
78
-
79
- # Partition training sets
80
- train_set = task_col .isin ([1 , 2 ])
81
- # Generate training targets
82
- train_targets_all = np .ravel (label_binarize (task_col , classes = [1 ]))
83
- train_targets = train_targets_all [train_set ]
84
- # Partition diagrams
85
- train_data = diagrams [train_set ]
86
-
87
- # todo: Create test data by complementing the train data, pay attention to the reading position of test data
88
- test_set = train_set
89
- test_targets_all = train_targets_all
90
- test_targets = train_targets
91
- test_data = train_data
92
-
76
+
77
+ ## todo: Try to load data/target
78
+
93
79
data_list .append ((train_data , test_data ))
94
80
target_list .append ((train_targets , test_targets ))
95
-
96
- return data_list , target_list
97
-
98
- def data_preprocess_ (args ):
99
- if args .ent :
100
- diagrams = feature_extraction ()[0 ]
101
- else :
102
- diagrams = np .load ('./data/diagrams.npy' )
103
- cast = pd .read_table ('./data/SCOP40mini_sequence_minidatabase_19.cast' )
104
- cast .columns .values [0 ] = 'protein'
105
-
106
- ## todo: Try another way to load data (just not use isin and label_binarize)
107
-
108
- data_list = []
109
- target_list = []
110
-
81
+
111
82
return data_list , target_list
112
83
113
84
def main (args ):
114
- if args .d == 0 :
115
- data_list , target_list = data_preprocess (args )
116
- else :
117
- data_list , target_list = data_preprocess_ (args )
85
+
86
+ data_list , target_list = data_preprocess (args )
118
87
119
88
task_acc_train = []
120
89
task_acc_test = []
@@ -159,6 +128,5 @@ def main(args):
159
128
parser .add_argument ('--kernel' , type = str , default = 'rbf' , choices = ['linear' , 'poly' , 'rbf' , 'sigmoid' ], help = "Kernel type" )
160
129
parser .add_argument ('--C' , type = float , default = 20 , help = "Regularization parameter" )
161
130
parser .add_argument ('--ent' , action = 'store_true' , help = "Load data from a file using a feature engineering function feature_extraction() from fea.py" )
162
- parser .add_argument ('--d' , type = int , choices = [0 , 1 ], default = 0 , help = 'A flag to choose the data reading method. Default is 0.' )
163
131
args = parser .parse_args ()
164
132
main (args )
0 commit comments