Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 23 additions & 173 deletions decision_tree_ensembles/tree_ensemble_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -203,29 +203,15 @@
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"clf = DecisionTreeClassifier(random_state=0)\n",
"clf = RandomForestClassifier(random_state=0)\n",
"clf = AdaBoostClassifier(random_state=0)\n",
"clf = GradientBoostingClassifier(random_state=0)\n"
]
}
],
"outputs": [],
"source": [
"clf_list = []\n",
"model_name_list = ['DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier']\n",
"model_name_list = [DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier]\n",
"\n",
"# train model\n",
"for model_name in model_name_list:\n",
"\n",
" # dynamically write command to train each classifier in model_name_list\n",
" cmd = 'clf = ' + model_name + '(random_state=0)'\n",
" print(cmd) # print command\n",
" exec(cmd) # execute command\n",
" clf = model_name(random_state=0)\n",
" \n",
" # fit data to model\n",
" clf = clf.fit(X_train, y_train)\n",
Expand All @@ -241,12 +227,7 @@
"outputs": [
{
"data": {
"text/plain": [
"[DecisionTreeClassifier(random_state=0),\n",
" RandomForestClassifier(random_state=0),\n",
" AdaBoostClassifier(random_state=0),\n",
" GradientBoostingClassifier(random_state=0)]"
]
"text/plain": "[DecisionTreeClassifier(random_state=0),\n RandomForestClassifier(random_state=0),\n AdaBoostClassifier(random_state=0),\n GradientBoostingClassifier(random_state=0)]"
},
"execution_count": 11,
"metadata": {},
Expand All @@ -268,176 +249,45 @@
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"y_pred_train = clf.predict(X_train)\n",
"prec_val_list.append(precision_score(y_train, y_pred_train))\n",
"prec_val_list.append(recall_score(y_train, y_pred_train))\n",
"prec_val_list.append(f1_score(y_train, y_pred_train))\n",
"\n",
"y_pred_test = clf.predict(X_test)\n",
"prec_val_list.append(precision_score(y_test, y_pred_test))\n",
"prec_val_list.append(recall_score(y_test, y_pred_test))\n",
"prec_val_list.append(f1_score(y_test, y_pred_test))\n",
"\n",
"y_pred_train = clf.predict(X_train)\n",
"prec_val_list.append(precision_score(y_train, y_pred_train))\n",
"prec_val_list.append(recall_score(y_train, y_pred_train))\n",
"prec_val_list.append(f1_score(y_train, y_pred_train))\n",
"\n",
"y_pred_test = clf.predict(X_test)\n",
"prec_val_list.append(precision_score(y_test, y_pred_test))\n",
"prec_val_list.append(recall_score(y_test, y_pred_test))\n",
"prec_val_list.append(f1_score(y_test, y_pred_test))\n",
"\n",
"y_pred_train = clf.predict(X_train)\n",
"prec_val_list.append(precision_score(y_train, y_pred_train))\n",
"prec_val_list.append(recall_score(y_train, y_pred_train))\n",
"prec_val_list.append(f1_score(y_train, y_pred_train))\n",
"\n",
"y_pred_test = clf.predict(X_test)\n",
"prec_val_list.append(precision_score(y_test, y_pred_test))\n",
"prec_val_list.append(recall_score(y_test, y_pred_test))\n",
"prec_val_list.append(f1_score(y_test, y_pred_test))\n",
"\n",
"y_pred_train = clf.predict(X_train)\n",
"prec_val_list.append(precision_score(y_train, y_pred_train))\n",
"prec_val_list.append(recall_score(y_train, y_pred_train))\n",
"prec_val_list.append(f1_score(y_train, y_pred_train))\n",
"\n",
"y_pred_test = clf.predict(X_test)\n",
"prec_val_list.append(precision_score(y_test, y_pred_test))\n",
"prec_val_list.append(recall_score(y_test, y_pred_test))\n",
"prec_val_list.append(f1_score(y_test, y_pred_test))\n",
"\n"
]
}
],
"outputs": [],
"source": [
"performance_dict = {}\n",
"\n",
"dataset_name_list = ['train', 'test']\n",
"metric_name_list = ['precision', 'recall', 'f1']\n",
"dataset_list = [\n",
" (X_train, y_train, \"train\"),\n",
" (X_test, y_test, \"test\"),\n",
"]\n",
"metric_name_list = {precision_score: \"precision\", recall_score: \"recall\", f1_score: \"f1\"}\n",
"col_name_list = []\n",
"\n",
"for i in range(len(model_name_list)):\n",
" \n",
"\n",
" clf = clf_list[i]\n",
" \n",
"\n",
" prec_val_list = []\n",
" col_name_list = []\n",
" \n",
" for dataset_name in dataset_name_list:\n",
"\n",
" cmd = 'y_pred_' + dataset_name + ' = clf.predict(X_' + dataset_name + ')'\n",
" print(cmd)\n",
" exec(cmd)\n",
" \n",
" for metric_name in metric_name_list:\n",
" \n",
" cmd = 'prec_val_list.append(' + metric_name + '_score(y_' + dataset_name + ', y_pred_' + dataset_name + '))'\n",
" print(cmd)\n",
" exec(cmd)\n",
" for x_dataset, y_dataset, col_name in dataset_list:\n",
" y_pred = clf.predict(x_dataset)\n",
"\n",
" for metric_func, pretty_name in metric_name_list.items():\n",
" prec_val_list.append(metric_func(y_dataset, y_pred))\n",
" col_name_list.append(pretty_name + '_' + col_name)\n",
"\n",
" col_name_list.append(metric_name + '_' + dataset_name)\n",
" print('')\n",
" performance_dict[model_name_list[i].__name__] = prec_val_list\n",
"\n",
" performance_dict[model_name_list[i]] = prec_val_list\n",
" \n",
"df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)"
"df_performance = np.round(pd.DataFrame.from_dict(performance_dict, orient='index', columns=col_name_list),3)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>precision_train</th>\n",
" <th>recall_train</th>\n",
" <th>f1_train</th>\n",
" <th>precision_test</th>\n",
" <th>recall_test</th>\n",
" <th>f1_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>DecisionTreeClassifier</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.952</td>\n",
" <td>0.896</td>\n",
" <td>0.923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RandomForestClassifier</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.985</td>\n",
" <td>0.955</td>\n",
" <td>0.970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AdaBoostClassifier</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.956</td>\n",
" <td>0.970</td>\n",
" <td>0.963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>GradientBoostingClassifier</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.970</td>\n",
" <td>0.970</td>\n",
" <td>0.970</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" precision_train recall_train f1_train \\\n",
"DecisionTreeClassifier 1.0 1.0 1.0 \n",
"RandomForestClassifier 1.0 1.0 1.0 \n",
"AdaBoostClassifier 1.0 1.0 1.0 \n",
"GradientBoostingClassifier 1.0 1.0 1.0 \n",
"\n",
" precision_test recall_test f1_test \n",
"DecisionTreeClassifier 0.952 0.896 0.923 \n",
"RandomForestClassifier 0.985 0.955 0.970 \n",
"AdaBoostClassifier 0.956 0.970 0.963 \n",
"GradientBoostingClassifier 0.970 0.970 0.970 "
]
"text/plain": " precision_train recall_train f1_train \\\nDecisionTreeClassifier 1.0 1.0 1.0 \nRandomForestClassifier 1.0 1.0 1.0 \nAdaBoostClassifier 1.0 1.0 1.0 \nGradientBoostingClassifier 1.0 1.0 1.0 \n\n precision_test recall_test f1_test \nDecisionTreeClassifier 0.952 0.896 0.923 \nRandomForestClassifier 0.985 0.955 0.970 \nAdaBoostClassifier 0.956 0.970 0.963 \nGradientBoostingClassifier 0.970 0.970 0.970 ",
Copy link
Author

@teastburn teastburn Dec 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not sure why my pycharm/jupyter setup reformatted this. the previous formatting was more readable. but i wanted to be sure the correctness of the output was the same, so i included it here.

"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>precision_train</th>\n <th>recall_train</th>\n <th>f1_train</th>\n <th>precision_test</th>\n <th>recall_test</th>\n <th>f1_test</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>DecisionTreeClassifier</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.952</td>\n <td>0.896</td>\n <td>0.923</td>\n </tr>\n <tr>\n <th>RandomForestClassifier</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.985</td>\n <td>0.955</td>\n <td>0.970</td>\n </tr>\n <tr>\n <th>AdaBoostClassifier</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.956</td>\n <td>0.970</td>\n <td>0.963</td>\n </tr>\n <tr>\n <th>GradientBoostingClassifier</th>\n <td>1.0</td>\n <td>1.0</td>\n <td>1.0</td>\n <td>0.970</td>\n <td>0.970</td>\n <td>0.970</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 13,
"metadata": {},
Expand Down