-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodeling.py
More file actions
89 lines (79 loc) · 2.84 KB
/
modeling.py
File metadata and controls
89 lines (79 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from xgboost import XGBRegressor, XGBClassifier
import numpy as np #type:ignore
import xgboost as xgb
import pandas as pd #type:ignore
# Common GPU configuration for all models
GPU_CONFIG = {
'tree_method': 'hist',
'device': 'cuda:0',
'enable_categorical': False,
'verbosity': 0
}
def _prepare_data(X, y=None):
"""Convert data to GPU-compatible format"""
X = np.asarray(X, dtype=np.float32, order='C')
if y is not None:
y = np.asarray(y, dtype=np.float32 if isinstance(y, pd.Series) else 'int32')
return X, y
# Classification Models
def train_classification_model(X_train, y_train):
X, y = _prepare_data(X_train, y_train)
model = XGBClassifier(**GPU_CONFIG, use_label_encoder=False)
model.fit(X, y)
return model
def evaluate_classification_model(model, X_test, y_test):
X, y = _prepare_data(X_test, y_test)
y_pred = model.predict(X)
return (
accuracy_score(y, y_pred),
precision_score(y, y_pred, average="weighted"),
recall_score(y, y_pred, average="weighted"),
f1_score(y, y_pred, average="weighted")
)
def tune_classification_model(X_train, y_train):
X, y = _prepare_data(X_train, y_train)
param_grid = {
'n_estimators': [100, 200],
'max_depth': [3, 6, 9],
'learning_rate': [0.01, 0.1],
'subsample': [0.8, 1.0],
'colsample_bytree': [0.8, 1.0]
}
grid_search = GridSearchCV(
XGBClassifier(**GPU_CONFIG, use_label_encoder=False),
param_grid,
cv=TimeSeriesSplit(n_splits=5),
scoring='accuracy'
)
grid_search.fit(X, y)
return grid_search.best_estimator_, grid_search.best_params_
# Regression Models
def train_regression_model(X_train, y_train):
X, y = _prepare_data(X_train, y_train)
model = XGBRegressor(**GPU_CONFIG, objective='reg:squarederror')
model.fit(X, y)
return model
def tune_regression_model(X_train, y_train):
X, y = _prepare_data(X_train, y_train)
param_grid = {
'n_estimators': [100, 200],
'max_depth': [3, 6, 9],
'learning_rate': [0.01, 0.1],
'subsample': [0.8, 1.0],
'colsample_bytree': [0.8, 1.0]
}
grid_search = GridSearchCV(
XGBRegressor(**GPU_CONFIG, objective='reg:squarederror'),
param_grid,
cv=TimeSeriesSplit(n_splits=5).split(X_train),
scoring='neg_mean_squared_error'
)
grid_search.fit(X, y)
return grid_search.best_estimator_, grid_search.best_params_
def evaluate_regression_model(model, X_test, y_test):
X, y = _prepare_data(X_test, y_test)
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)
return mse, np.sqrt(mse), r2_score(y, y_pred)