Skip to content

Commit e690bd9

Browse files
committed
update todo
1 parent 99aa67b commit e690bd9

File tree

2 files changed

+6
-42
lines changed

2 files changed

+6
-42
lines changed

pj1/README.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@ pip install numpy pandas scikit-learn biopython
3535
3. **Test Data Generation:**
3636
- Complete the generation of test data by complementing the train data. Ensure correct reading positions for test data.
3737

38-
4. **Rewrite Data Preprocess:**
39-
- Train another way to load data (just not use isin and label_binarize) and implementation data_preprocess_ functions code.
40-
4138
### Running the Script:
4239
- Execute the script `main.py` from the command line.
4340
- You can provide arguments to customize the model type, kernel type, regularization parameter, and data loading method.
@@ -51,7 +48,6 @@ python main.py --model_type svm --kernel rbf --C 1.0
5148

5249
1. **Complete Implementation of Protein Classification (Data Loading) - 4 points**
5350
- Ensure the code effectively reads and preprocesses protein structure data and sequences.
54-
- Both functions should work fine.
5551

5652
2. **Comparison of Linear SVM with Other Machine Learning Methods (e.g., LR) - 2 points**
5753
- Implement Linear SVM model and LR model.

pj1/main.py

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ class LinearSVMModel:
6262

6363

6464
def data_preprocess(args):
65-
# Load data
6665
if args.ent:
6766
diagrams = feature_extraction()[0]
6867
else:
@@ -72,49 +71,19 @@ def data_preprocess(args):
7271

7372
data_list = []
7473
target_list = []
75-
7674
for task in range(1, 56): # Assuming only one task for now
7775
task_col = cast.iloc[:, task]
78-
79-
# Partition training sets
80-
train_set = task_col.isin([1, 2])
81-
# Generate training targets
82-
train_targets_all = np.ravel(label_binarize(task_col, classes=[1]))
83-
train_targets = train_targets_all[train_set]
84-
# Partition diagrams
85-
train_data = diagrams[train_set]
86-
87-
# todo: Create test data by complementing the train data, pay attention to the reading position of test data
88-
test_set = train_set
89-
test_targets_all = train_targets_all
90-
test_targets = train_targets
91-
test_data = train_data
92-
76+
77+
## todo: Try to load data/target
78+
9379
data_list.append((train_data, test_data))
9480
target_list.append((train_targets, test_targets))
95-
96-
return data_list, target_list
97-
98-
def data_preprocess_(args):
99-
if args.ent:
100-
diagrams = feature_extraction()[0]
101-
else:
102-
diagrams = np.load('./data/diagrams.npy')
103-
cast = pd.read_table('./data/SCOP40mini_sequence_minidatabase_19.cast')
104-
cast.columns.values[0] = 'protein'
105-
106-
## todo: Try another way to load data (just not use isin and label_binarize)
107-
108-
data_list = []
109-
target_list = []
110-
81+
11182
return data_list, target_list
11283

11384
def main(args):
114-
if args.d == 0:
115-
data_list, target_list = data_preprocess(args)
116-
else:
117-
data_list, target_list = data_preprocess_(args)
85+
86+
data_list, target_list = data_preprocess(args)
11887

11988
task_acc_train = []
12089
task_acc_test = []
@@ -159,6 +128,5 @@ def main(args):
159128
parser.add_argument('--kernel', type=str, default='rbf', choices=['linear', 'poly', 'rbf', 'sigmoid'], help="Kernel type")
160129
parser.add_argument('--C', type=float, default=20, help="Regularization parameter")
161130
parser.add_argument('--ent', action='store_true', help="Load data from a file using a feature engineering function feature_extraction() from fea.py")
162-
parser.add_argument('--d', type=int, choices=[0, 1], default=0, help='A flag to choose the data reading method. Default is 0.')
163131
args = parser.parse_args()
164132
main(args)

0 commit comments

Comments
 (0)