Skip to content

Commit 00ed838

Browse files
committed
add new file for roc compering models, add cells to roc_curve
1 parent ca0b602 commit 00ed838

File tree

2 files changed

+80
-7
lines changed

2 files changed

+80
-7
lines changed

Diff for: metrics/roc_curve.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
1+
"""Example of computing ROC curve
2+
"""
3+
#%%
14
import numpy as np
2-
from sklearn import metrics
35
import matplotlib.pyplot as plt
6+
import sklearn.metrics as skm
47

8+
#y_true = np.array([0, 0, 0, 1, 0, 1, 1, 1])
59
y_true = np.array([0, 0, 0, 0, 0, 1, 1, 1])
610
y_score = np.array([0.1, 0.4, 0.35, 0.7, 0.2, 0.3, 0.6, 0.8])
711

8-
fpr, tpr, tresholds = metrics.roc_curve(y_true, y_scores, pos_label=1)
12+
#%%
13+
fpr, tpr, tresholds = skm.roc_curve(y_true, y_score, pos_label=1)
914

10-
print(f'\nfpr={fpr}\ntpr={tpr}\ntre={thresholds}')
15+
print(f'\nfpr={fpr}\ntpr={tpr}\ntre={tresholds}')
1116

1217

1318
for tr, tp, fp in zip(tresholds, tpr, fpr):
14-
y_pred = (y_scores>=tr).astype(int)
15-
cm = metrics.confusion_matrix(y_true, y_pred)
19+
y_pred = (y_score>=tr).astype(int)
20+
cm = skm.confusion_matrix(y_true, y_pred)
1621
print(cm)
17-
print(metrics.classification_report(y_true, y_pred))
22+
print(skm.classification_report(y_true, y_pred))
1823
print(f'Treshold={tr},tpr={tp}, fpr={fp} predicions={y_pred}')
1924

25+
26+
#%%
2027
plt.plot(fpr, tpr, lw=1)
2128
plt.scatter(fpr,tpr)
2229

@@ -26,4 +33,6 @@
2633
plt.ylabel('True Positive Rate')
2734
plt.title('Receiver operating characteristic example')
2835
#plt.legend(loc="lower right")
29-
plt.show()
36+
plt.show()
37+
38+
# %%

Diff for: metrics/roc_curve_model_comparision.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#%%
2+
import matplotlib.pyplot as plt
3+
import numpy as np
4+
import sklearn.metrics as skm
5+
6+
7+
8+
y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
9+
10+
# model1 curve will dominate the model0
11+
# output from model0
12+
y_score0 = np.array([0.75, 0.5, 0.3, 0.35, 0.45, 0.7, 0.3, 0.33, 0.5, 0.8])
13+
# output from model1
14+
y_score1 = np.array([0.6, 0.3, 0.3, 0.55, 0.65, 0.4, 0.55, 0.33, 0.75, 0.3])
15+
16+
# model 1 is better
17+
# # output from model0
18+
# y_score0 = np.array([0.75, 0.5, 0.3, 0.35, 0.45, 0.7, 0.3, 0.33, 0.5, 0.8])
19+
# # output from model1
20+
# y_score1 = np.array([0.7, 0.3, 0.3, 0.55, 0.75, 0.4, 0.5, 0.33, 0.72, 0.3])
21+
22+
# looking only at curves it is not so obvious, which one is better
23+
# output from model0
24+
# y_score0 = np.array([0.7, 0.45, 0.3, 0.35, 0.45, 0.7, 0.3, 0.33, 0.55, 0.8])
25+
# # output from model1
26+
# y_score1 = np.array([0.6, 0.3, 0.3, 0.55, 0.65, 0.4, 0.5, 0.33, 0.75, 0.3])
27+
28+
29+
# %
30+
31+
32+
# first model
33+
fpr0, tpr0, tresholds0 = skm.roc_curve(y_true, y_score0)
34+
35+
# second model
36+
37+
fpr1, tpr1, tresholds1 = skm.roc_curve(y_true, y_score1)
38+
39+
40+
auc_roc0 = skm.roc_auc_score(y_true, y_score0)
41+
auc0 = skm.auc(fpr0,tpr0)
42+
print(f"Model 0 auc roc={auc_roc0} area under curve={auc0}")
43+
44+
auc_roc1 = skm.roc_auc_score(y_true, y_score1)
45+
auc1 = skm.auc(fpr1,tpr1)
46+
print(f"Model 0 auc roc={auc_roc1} area under curve={auc1}")
47+
48+
49+
# % plot curve
50+
plt.plot(fpr0,tpr0, 'ro')
51+
plt.plot(fpr0,tpr0, 'r', label='model 0')
52+
53+
plt.plot(fpr1,tpr1, 'bo')
54+
plt.plot(fpr1,tpr1, 'b', label='model 1')
55+
56+
plt.xlabel('FPR')
57+
plt.ylabel('TPR')
58+
plt.ylim([0.0, 1.05])
59+
plt.xlim([0.0, 1.0])
60+
plt.title('ROC curve for 2 ml models')
61+
plt.legend()
62+
plt.show()
63+
64+
# %%

0 commit comments

Comments
 (0)