-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathdemo9.py
57 lines (38 loc) · 1.59 KB
/
demo9.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# coding: utf-8
"""AutoML time utilization preset usage for tabular datasets.
Predefined structure of AutoML pipeline and simple interface for users without building from blocks.
"""
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.tasks import Task
np.random.seed(42)
data = pd.read_csv("./data/sampled_app_train.csv")
data["BIRTH_DATE"] = (np.datetime64("2018-01-01") + data["DAYS_BIRTH"].astype(np.dtype("timedelta64[D]"))).astype(str)
data["EMP_DATE"] = (
np.datetime64("2018-01-01") + np.clip(data["DAYS_EMPLOYED"], None, 0).astype(np.dtype("timedelta64[D]"))
).astype(str)
data["report_dt"] = np.datetime64("2018-01-01")
data["constant"] = 1
data["allnan"] = np.nan
data.drop(["DAYS_BIRTH", "DAYS_EMPLOYED"], axis=1, inplace=True)
train, test = train_test_split(data, test_size=2000, random_state=42)
roles = {
"target": "TARGET",
DatetimeRole(base_date=True, seasonality=(), base_feats=False): "report_dt",
}
task = Task("binary")
automl = TabularUtilizedAutoML(
task=task,
timeout=600,
)
oof_pred = automl.fit_predict(train, roles=roles)
test_pred = automl.predict(test)
# use only not nan
not_nan = np.any(~np.isnan(oof_pred.data), axis=1)
print(f"OOF score: {roc_auc_score(train['TARGET'].values[not_nan], oof_pred.data[not_nan])}")
print(f"TEST score: {roc_auc_score(test[roles['target']].values, test_pred.data[:, 0])}")