From eb7f133bf67892daa52f01ddb01928ae606afcbc Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sun, 21 Sep 2025 21:36:58 +0900
Subject: [PATCH 1/6] policy aipw v1

---
 book/cate_and_policy/policy_learning.ipynb | 65 ++++++++++++++++++++++
 1 file changed, 65 insertions(+)
diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index 7d25a78..5182a2f 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -32,6 +32,71 @@
     "        async>\n",
     "</script>"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 라이브러리와 난수 고정\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.patches import Rectangle\n",
+    "rng = np.random.default_rng(42)\n",
+    "\n",
+    "# 데이터 생성\n",
+    "n = 1000\n",
+    "p = 4\n",
+    "X = rng.random((n, p))                   # 0~1 균등분포 공변량 4개\n",
+    "W = rng.binomial(1, 0.5, size=n)         # 무작위 처치(0/1), 확률 0.5\n",
+    "Y = 0.5*(X[:, 0] - 0.5) + (X[:, 1] - 0.5)*W + 0.1*rng.normal(size=n)\n",
+    "\n",
+    "# 시각화\n",
+    "y_norm = 1 - (Y - Y.min())/(Y.max() - Y.min())  # 0~1로 정규화\n",
+    "gray_colors = np.array([str(v) for v in y_norm])\n",
+    "\n",
+    "plt.scatter(X[:, 0], X[:, 1], c=gray_colors, s=60, marker='o',\n",
+    "            edgecolors='k', linewidths=0.5)\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.patches import Rectangle\n",
+    "\n",
+    "plt.figure(figsize=(6, 5))\n",
+    "\n",
+    "# 1) 구역 칠하기 (사각형 3개)\n",
+    "col_treat = (0.25, 0.69, 0.65, 0.35)   # 초록 투명\n",
+    "col_notreat = (0.996, 0.754, 0.027, 0.35)  # 노랑 투명\n",
+    "\n",
+    "# 왼쪽(0~0.5, 전체 y)\n",
+    "plt.gca().add_patch(Rectangle((-.1, -.1), 0.6, 1.2, facecolor=col_notreat, edgecolor='none', hatch='///'))\n",
+    "# 오른쪽 아래(0.5~1, 0~0.5)\n",
+    "plt.gca().add_patch(Rectangle((0.5, -.1), 0.6, 0.6, facecolor=col_notreat, edgecolor='none', hatch='///'))\n",
+    "# 오른쪽 위(0.5~1, 0.5~1)\n",
+    "plt.gca().add_patch(Rectangle((0.5, 0.5), 0.6, 0.6, facecolor=col_treat, edgecolor='none', hatch='///'))\n",
+    "\n",
+    "# 2) 점 찍기\n",
+    "plt.scatter(X[W==0,0], X[W==0,1],\n",
+    "            c=y_norm[W==0], cmap='gray', vmin=0, vmax=1,\n",
+    "            s=60, marker='^', edgecolors='k', linewidths=0.5,\n",
+    "            label=\"Untreated\")\n",
+    "plt.scatter(X[W==1,0], X[W==1,1],\n",
+    "            c=y_norm[W==1], cmap='gray', vmin=0, vmax=1,\n",
+    "            s=60, marker='o', edgecolors='k', linewidths=0.5,\n",
+    "            label=\"Treated\")\n",
+    "\n",
+    "# 3) 텍스트 라벨 붙이기\n",
+    "plt.text(0.75, 0.75, \"TREAT (A)\", fontsize=14, ha='center', va='center')\n",
+    "plt.text(0.25, 0.25, \"DO NOT TREAT (A^C)\", fontsize=12, ha='center', va='center')\n",
+    "\n",
+    "plt.xlim(-0.1, 1.1)\n",
+    "plt.ylim(-0.1, 1.1)\n",
+    "plt.xlabel(\"X1\"); plt.ylabel(\"X2\")\n",
+    "plt.title(\"Policy Regions with Treated vs Untreated\")\n",
+    "plt.legend()\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n"
+   ]
   }
  ],
  "metadata": {

From 1a6025b5e8e503c56ad4ec315c4eb04c9c07e7a1 Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sun, 28 Sep 2025 00:10:20 +0900
Subject: [PATCH 2/6] policy aipw v2

---
 book/cate_and_policy/policy_learning.ipynb | 269 +++++++++++++++++----
 1 file changed, 227 insertions(+), 42 deletions(-)

diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index 5182a2f..f6c07bc 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -39,63 +39,248 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 라이브러리와 난수 고정\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "from matplotlib.patches import Rectangle\n",
-    "rng = np.random.default_rng(42)\n",
+    "import matplotlib.patches as mpatches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set random seed for reproducibility\n",
+    "np.random.seed(42)\n",
     "\n",
-    "# 데이터 생성\n",
+    "# Generate data\n",
     "n = 1000\n",
     "p = 4\n",
-    "X = rng.random((n, p))                   # 0~1 균등분포 공변량 4개\n",
-    "W = rng.binomial(1, 0.5, size=n)         # 무작위 처치(0/1), 확률 0.5\n",
-    "Y = 0.5*(X[:, 0] - 0.5) + (X[:, 1] - 0.5)*W + 0.1*rng.normal(size=n)\n",
+    "X = np.random.uniform(0, 1, (n, p))\n",
+    "W = np.random.binomial(1, 0.5, n)  # Independent from X and Y\n",
+    "Y = 0.5 * (X[:, 0] - 0.5) + (X[:, 1] - 0.5) * W + 0.1 * np.random.randn(n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize Y for plotting\n",
+    "y_norm = 1 - (Y - Y.min()) / (Y.max() - Y.min())\n",
+    "\n",
+    "# First plot: All data points\n",
+    "fig1, ax1 = plt.subplots(1, 1, figsize=(8, 6))\n",
+    "for i in range(n):\n",
+    "    if W[i] == 1:\n",
+    "        ax1.scatter(X[i, 0], X[i, 1], marker='o', s=100, \n",
+    "                   c=[y_norm[i]], cmap='gray', vmin=0, vmax=1, \n",
+    "                   edgecolors='black', linewidths=1)\n",
+    "    else:\n",
+    "        ax1.scatter(X[i, 0], X[i, 1], marker='D', s=80, \n",
+    "                   c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "                   edgecolors='black', linewidths=1)\n",
+    "ax1.set_xlabel('X1', fontsize=12)\n",
+    "ax1.set_ylabel('X2', fontsize=12)\n",
+    "ax1.set_title('All Data Points (○: Treated, ◇: Untreated)', fontsize=14)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Second plot: Separated by treatment\n",
+    "fig2, (ax2, ax3) = plt.subplots(1, 2, figsize=(14, 6))\n",
     "\n",
-    "# 시각화\n",
-    "y_norm = 1 - (Y - Y.min())/(Y.max() - Y.min())  # 0~1로 정규화\n",
-    "gray_colors = np.array([str(v) for v in y_norm])\n",
+    "# Untreated group\n",
+    "untreated_idx = W == 0\n",
+    "ax2.scatter(X[untreated_idx, 0], X[untreated_idx, 1], marker='D', s=80, \n",
+    "           c=y_norm[untreated_idx], cmap='gray', vmin=0, vmax=1,\n",
+    "           edgecolors='black', linewidths=1)\n",
+    "ax2.set_xlabel('X1', fontsize=12)\n",
+    "ax2.set_ylabel('X2', fontsize=12)\n",
+    "ax2.set_title('Untreated', fontsize=14)\n",
     "\n",
-    "plt.scatter(X[:, 0], X[:, 1], c=gray_colors, s=60, marker='o',\n",
-    "            edgecolors='k', linewidths=0.5)\n",
+    "# Treated group\n",
+    "treated_idx = W == 1\n",
+    "ax3.scatter(X[treated_idx, 0], X[treated_idx, 1], marker='o', s=100, \n",
+    "           c=y_norm[treated_idx], cmap='gray', vmin=0, vmax=1,\n",
+    "           edgecolors='black', linewidths=1)\n",
+    "ax3.set_xlabel('X1', fontsize=12)\n",
+    "ax3.set_ylabel('X2', fontsize=12)\n",
+    "ax3.set_title('Treated', fontsize=14)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Third plot: Policy regions\n",
+    "fig3, ax4 = plt.subplots(1, 1, figsize=(8, 6))\n",
     "\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib.patches import Rectangle\n",
+    "# Define colors with transparency\n",
+    "col1 = (0.9960938, 0.7539062, 0.0273438, 0.35)  # Yellow-ish\n",
+    "col2 = (0.250980, 0.690196, 0.650980, 0.35)     # Teal-ish\n",
     "\n",
-    "plt.figure(figsize=(6, 5))\n",
+    "# Draw policy regions\n",
+    "rect1 = Rectangle((-0.1, -0.1), 0.6, 1.2, linewidth=0, \n",
+    "                  edgecolor='none', facecolor=col1, hatch='///')\n",
+    "rect2 = Rectangle((0.5, -0.1), 0.6, 0.6, linewidth=0, \n",
+    "                  edgecolor='none', facecolor=col1, hatch='///')\n",
+    "rect3 = Rectangle((0.5, 0.5), 0.6, 0.6, linewidth=0, \n",
+    "                  edgecolor='none', facecolor=col2, hatch='///')\n",
+    "ax4.add_patch(rect1)\n",
+    "ax4.add_patch(rect2)\n",
+    "ax4.add_patch(rect3)\n",
     "\n",
-    "# 1) 구역 칠하기 (사각형 3개)\n",
-    "col_treat = (0.25, 0.69, 0.65, 0.35)   # 초록 투명\n",
-    "col_notreat = (0.996, 0.754, 0.027, 0.35)  # 노랑 투명\n",
+    "# Plot data points\n",
+    "for i in range(n):\n",
+    "    if W[i] == 1:\n",
+    "        ax4.scatter(X[i, 0], X[i, 1], marker='o', s=100, \n",
+    "                   c=[y_norm[i]], cmap='gray', vmin=0, vmax=1, \n",
+    "                   edgecolors='black', linewidths=1)\n",
+    "    else:\n",
+    "        ax4.scatter(X[i, 0], X[i, 1], marker='D', s=80, \n",
+    "                   c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "                   edgecolors='black', linewidths=1)\n",
+    "\n",
+    "# Add text labels\n",
+    "ax4.text(0.75, 0.75, 'TREAT (A)', fontsize=16, ha='center', va='center')\n",
+    "ax4.text(0.25, 0.25, 'DO NOT TREAT (A^C)', fontsize=16, ha='left', va='center')\n",
+    "ax4.set_xlabel('X1', fontsize=12)\n",
+    "ax4.set_ylabel('X2', fontsize=12)\n",
+    "ax4.set_xlim(-0.1, 1.1)\n",
+    "ax4.set_ylim(-0.1, 1.1)\n",
+    "ax4.set_title('Policy Regions', fontsize=14)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Policy Evaluation Methods\n",
+    "print(\"=\" * 60)\n",
+    "print(\"POLICY EVALUATION RESULTS\")\n",
+    "print(\"=\" * 60)\n",
     "\n",
-    "# 왼쪽(0~0.5, 전체 y)\n",
-    "plt.gca().add_patch(Rectangle((-.1, -.1), 0.6, 1.2, facecolor=col_notreat, edgecolor='none', hatch='///'))\n",
-    "# 오른쪽 아래(0.5~1, 0~0.5)\n",
-    "plt.gca().add_patch(Rectangle((0.5, -.1), 0.6, 0.6, facecolor=col_notreat, edgecolor='none', hatch='///'))\n",
-    "# 오른쪽 위(0.5~1, 0.5~1)\n",
-    "plt.gca().add_patch(Rectangle((0.5, 0.5), 0.6, 0.6, facecolor=col_treat, edgecolor='none', hatch='///'))\n",
+    "# Method 1: Value of policy A (only valid in randomized setting)\n",
+    "A = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)\n",
+    "value_estimate = np.mean(Y[A & (W == 1)]) * np.mean(A) + \\\n",
+    "                 np.mean(Y[~A & (W == 0)]) * np.mean(~A)\n",
+    "value_stderr = np.sqrt(\n",
+    "    np.var(Y[A & (W == 1)]) / np.sum(A & (W == 1)) * np.mean(A)**2 + \n",
+    "    np.var(Y[~A & (W == 0)]) / np.sum(~A & (W == 0)) * np.mean(~A)**2\n",
+    ")\n",
+    "print(f\"\\nMethod 1: Value of Policy A\")\n",
+    "print(f\"Value estimate: {value_estimate:.6f}\")\n",
+    "print(f\"Std. Error: {value_stderr:.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method 2: Value of fixed treatment proportion (p=0.75)\n",
+    "p_treat = 0.75\n",
+    "value_estimate2 = p_treat * np.mean(Y[W == 1]) + (1 - p_treat) * np.mean(Y[W == 0])\n",
+    "value_stderr2 = np.sqrt(\n",
+    "    np.var(Y[W == 1]) / np.sum(W == 1) * p_treat**2 + \n",
+    "    np.var(Y[W == 0]) / np.sum(W == 0) * (1 - p_treat)**2\n",
+    ")\n",
+    "print(f\"\\nMethod 2: Value of Fixed Treatment Proportion (p={p_treat})\")\n",
+    "print(f\"Value estimate: {value_estimate2:.6f}\")\n",
+    "print(f\"Std. Error: {value_stderr2:.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method 3: Treatment effect within policy region A\n",
+    "diff_estimate = (np.mean(Y[A & (W == 1)]) - np.mean(Y[A & (W == 0)])) * np.mean(A)\n",
+    "diff_stderr = np.sqrt(\n",
+    "    np.var(Y[A & (W == 1)]) / np.sum(A & (W == 1)) + \n",
+    "    np.var(Y[A & (W == 0)]) / np.sum(A & (W == 0))\n",
+    ") * np.mean(A)\n",
+    "print(f\"\\nMethod 3: Treatment Effect within Policy Region A\")\n",
+    "print(f\"Difference estimate: {diff_estimate:.6f}\")\n",
+    "print(f\"Std. Error: {diff_stderr:.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method 4: Optimal policy difference\n",
+    "diff_estimate2 = (np.mean(Y[A & (W == 1)]) - np.mean(Y[A & (W == 0)])) * np.mean(A) / 2 + \\\n",
+    "                 (np.mean(Y[~A & (W == 0)]) - np.mean(Y[~A & (W == 1)])) * np.mean(~A) / 2\n",
+    "diff_stderr2 = np.sqrt(\n",
+    "    (np.mean(A) / 2)**2 * (\n",
+    "        np.var(Y[A & (W == 1)]) / np.sum(A & (W == 1)) + \n",
+    "        np.var(Y[A & (W == 0)]) / np.sum(A & (W == 0))\n",
+    "    ) + \n",
+    "    (np.mean(~A) / 2)**2 * (\n",
+    "        np.var(Y[~A & (W == 1)]) / np.sum(~A & (W == 1)) + \n",
+    "        np.var(Y[~A & (W == 0)]) / np.sum(~A & (W == 0))\n",
+    "    )\n",
+    ")\n",
+    "print(f\"\\nMethod 4: Optimal Policy Difference\")\n",
+    "print(f\"Difference estimate: {diff_estimate2:.6f}\")\n",
+    "print(f\"Std. Error: {diff_stderr2:.6f}\")\n",
     "\n",
-    "# 2) 점 찍기\n",
-    "plt.scatter(X[W==0,0], X[W==0,1],\n",
-    "            c=y_norm[W==0], cmap='gray', vmin=0, vmax=1,\n",
-    "            s=60, marker='^', edgecolors='k', linewidths=0.5,\n",
-    "            label=\"Untreated\")\n",
-    "plt.scatter(X[W==1,0], X[W==1,1],\n",
-    "            c=y_norm[W==1], cmap='gray', vmin=0, vmax=1,\n",
-    "            s=60, marker='o', edgecolors='k', linewidths=0.5,\n",
-    "            label=\"Treated\")\n",
+    "print(\"\\n\" + \"=\" * 60)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Additional analysis: Treatment effect heterogeneity\n",
+    "print(\"\\nADDITIONAL ANALYSIS\")\n",
+    "print(\"=\" * 60)\n",
     "\n",
-    "# 3) 텍스트 라벨 붙이기\n",
-    "plt.text(0.75, 0.75, \"TREAT (A)\", fontsize=14, ha='center', va='center')\n",
-    "plt.text(0.25, 0.25, \"DO NOT TREAT (A^C)\", fontsize=12, ha='center', va='center')\n",
+    "# Calculate treatment effects by region\n",
+    "te_in_A = np.mean(Y[A & (W == 1)]) - np.mean(Y[A & (W == 0)])\n",
+    "te_out_A = np.mean(Y[~A & (W == 1)]) - np.mean(Y[~A & (W == 0)])\n",
     "\n",
-    "plt.xlim(-0.1, 1.1)\n",
-    "plt.ylim(-0.1, 1.1)\n",
-    "plt.xlabel(\"X1\"); plt.ylabel(\"X2\")\n",
-    "plt.title(\"Policy Regions with Treated vs Untreated\")\n",
-    "plt.legend()\n",
-    "plt.tight_layout()\n",
-    "plt.show()\n"
+    "print(f\"\\nTreatment Effect Heterogeneity:\")\n",
+    "print(f\"Treatment effect in region A: {te_in_A:.6f}\")\n",
+    "print(f\"Treatment effect outside region A: {te_out_A:.6f}\")\n",
+    "print(f\"Difference in treatment effects: {te_in_A - te_out_A:.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Summary statistics\n",
+    "print(f\"\\nSummary Statistics:\")\n",
+    "print(f\"Proportion in region A: {np.mean(A):.3f}\")\n",
+    "print(f\"Proportion treated: {np.mean(W):.3f}\")\n",
+    "print(f\"Mean outcome (treated): {np.mean(Y[W == 1]):.6f}\")\n",
+    "print(f\"Mean outcome (untreated): {np.mean(Y[W == 0]):.6f}\")\n",
+    "print(f\"Overall treatment effect: {np.mean(Y[W == 1]) - np.mean(Y[W == 0]):.6f}\")"
    ]
   }
  ],

From 8f7fb9ac63fcbd08fbc80fc469c00c296e5a4039 Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sat, 4 Oct 2025 23:43:27 +0900
Subject: [PATCH 3/6] policy aipw v3

---
 book/cate_and_policy/policy_learning.ipynb | 218 +++++++++++++++++++++
 1 file changed, 218 insertions(+)

diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index f6c07bc..9b3cb67 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -282,6 +282,224 @@
     "print(f\"Mean outcome (untreated): {np.mean(Y[W == 0]):.6f}\")\n",
     "print(f\"Overall treatment effect: {np.mean(Y[W == 1]) - np.mean(Y[W == 0]):.6f}\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate observational data\n",
+    "np.random.seed(123)\n",
+    "n = 1000\n",
+    "p = 4\n",
+    "X = np.random.uniform(0, 1, (n, p))\n",
+    "e = 1 / (1 + np.exp(-2*(X[:, 0] - 0.5) - 2*(X[:, 1] - 0.5)))  # not observed by analyst\n",
+    "W = np.random.binomial(1, e, n)\n",
+    "Y = 0.5 * (X[:, 0] - 0.5) + (X[:, 1] - 0.5) * W + 0.1 * np.random.randn(n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_norm = (Y - Y.min()) / (Y.max() - Y.min())\n",
+    "\n",
+    "# Plot by treatment status\n",
+    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n",
+    "\n",
+    "# Untreated\n",
+    "untreated_idx = W == 0\n",
+    "for i in np.where(untreated_idx)[0]:\n",
+    "    ax1.scatter(X[i, 0], X[i, 1], marker='D', s=80, \n",
+    "               c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "               edgecolors='black', linewidths=1)\n",
+    "ax1.set_xlabel('X1')\n",
+    "ax1.set_ylabel('X2')\n",
+    "ax1.set_title('Untreated')\n",
+    "\n",
+    "# Treated\n",
+    "treated_idx = W == 1\n",
+    "for i in np.where(treated_idx)[0]:\n",
+    "    ax2.scatter(X[i, 0], X[i, 1], marker='o', s=100, \n",
+    "               c=[y_norm[i]], cmap='gray', vmin=0, vmax=1,\n",
+    "               edgecolors='black', linewidths=1)\n",
+    "ax2.set_xlabel('X1')\n",
+    "ax2.set_ylabel('X2')\n",
+    "ax2.set_title('Treated')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
+    "from sklearn.model_selection import KFold"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CausalForest:\n",
+    "    \"\"\"\n",
+    "    Simplified Causal Forest implementation to match grf package behavior\n",
+    "    \"\"\"\n",
+    "    def __init__(self, n_estimators=2000, max_features='sqrt', min_samples_leaf=5, \n",
+    "                 honest=True, W_hat=None):\n",
+    "        self.n_estimators = n_estimators\n",
+    "        self.max_features = max_features\n",
+    "        self.min_samples_leaf = min_samples_leaf\n",
+    "        self.honest = honest\n",
+    "        self.W_hat_fixed = W_hat\n",
+    "        \n",
+    "    def fit(self, X, Y, W):\n",
+    "        self.X = X\n",
+    "        self.Y = Y\n",
+    "        self.W = W\n",
+    "        n = len(Y)\n",
+    "        \n",
+    "        # If W.hat is provided (randomized setting), use it\n",
+    "        if self.W_hat_fixed is not None:\n",
+    "            self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "        else:\n",
+    "            # Estimate propensity score\n",
+    "            ps_model = RandomForestClassifier(\n",
+    "                n_estimators=self.n_estimators//2,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42\n",
+    "            )\n",
+    "            ps_model.fit(X, W)\n",
+    "            self.W_hat = ps_model.predict_proba(X)[:, 1]\n",
+    "            # Clip to avoid division issues\n",
+    "            self.W_hat = np.clip(self.W_hat, 0.01, 0.99)\n",
+    "        \n",
+    "        # Estimate outcome model\n",
+    "        outcome_model = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        outcome_model.fit(X, Y)\n",
+    "        self.Y_hat = outcome_model.predict(X)\n",
+    "        \n",
+    "        # Estimate treatment effects using T-learner\n",
+    "        # Model for treated\n",
+    "        model_1 = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        if np.sum(W == 1) > 0:\n",
+    "            model_1.fit(X[W == 1], Y[W == 1])\n",
+    "            self.mu_1 = model_1.predict(X)\n",
+    "        else:\n",
+    "            self.mu_1 = np.zeros(n)\n",
+    "        \n",
+    "        # Model for control\n",
+    "        model_0 = RandomForestRegressor(\n",
+    "            n_estimators=self.n_estimators//2,\n",
+    "            max_features=self.max_features,\n",
+    "            min_samples_leaf=self.min_samples_leaf,\n",
+    "            random_state=42\n",
+    "        )\n",
+    "        if np.sum(W == 0) > 0:\n",
+    "            model_0.fit(X[W == 0], Y[W == 0])\n",
+    "            self.mu_0 = model_0.predict(X)\n",
+    "        else:\n",
+    "            self.mu_0 = np.zeros(n)\n",
+    "        \n",
+    "        # Treatment effect\n",
+    "        self.tau_hat = self.mu_1 - self.mu_0\n",
+    "        \n",
+    "        return self\n",
+    "    \n",
+    "    def predict(self):\n",
+    "        return {'predictions': self.tau_hat}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fit causal forest\n",
+    "print(\"\\nFitting causal forest...\")\n",
+    "forest = CausalForest()\n",
+    "forest.fit(X, Y, W)\n",
+    "\n",
+    "# Get predictions\n",
+    "tau_hat = forest.predict()['predictions']\n",
+    "\n",
+    "# Estimate outcome models\n",
+    "mu_hat_1 = forest.Y_hat + (1 - forest.W_hat) * tau_hat\n",
+    "mu_hat_0 = forest.Y_hat - forest.W_hat * tau_hat\n",
+    "\n",
+    "# Compute AIPW scores\n",
+    "gamma_hat_1 = mu_hat_1 + W/forest.W_hat * (Y - mu_hat_1)\n",
+    "gamma_hat_0 = mu_hat_0 + (1-W)/(1-forest.W_hat) * (Y - mu_hat_0)\n",
+    "\n",
+    "print(\"Causal forest fitted successfully.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# POLICY EVALUATION WITH AIPW\n",
+    "print(\"\\n--- Policy A (X1 > 0.5 & X2 > 0.5) with AIPW ---\")\n",
+    "pi = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "\n",
+    "print(\"\\n--- Random Policy (p=0.75) with AIPW ---\")\n",
+    "pi_random = 0.75\n",
+    "gamma_hat_pi = pi_random * gamma_hat_1 + (1 - pi_random) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print(\"\\n--- Difference: Policy A vs Never Treat ---\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# AIPW scores for Policy A\n",
+    "pi = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "\n",
+    "# AIPW scores for Never Treat\n",
+    "pi_never = 0\n",
+    "gamma_hat_pi_never = pi_never * gamma_hat_1 + (1 - pi_never) * gamma_hat_0\n",
+    "\n",
+    "# Difference\n",
+    "diff_scores = gamma_hat_pi - gamma_hat_pi_never\n",
+    "diff_estimate = np.mean(diff_scores)\n",
+    "diff_stderr = np.std(diff_scores) / np.sqrt(len(diff_scores))\n",
+    "print(f\"diff estimate: {diff_estimate:.10f} Std. Error: {diff_stderr:.10f}\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 70) \n",
+    "print(\"ANALYSIS COMPLETE\")\n",
+    "print(\"=\" * 70)"
+   ]
   }
  ],
  "metadata": {

From 9f69e18a26d4e1dd99b9c22fd3e6fb5a4f08fba5 Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sun, 12 Oct 2025 13:06:41 +0900
Subject: [PATCH 4/6] case study v1

---
 book/cate_and_policy/policy_learning.ipynb | 112 ++++++++++++++++++++-
 1 file changed, 110 insertions(+), 2 deletions(-)

diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index 9b3cb67..7e4749a 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -500,11 +500,119 @@
     "print(\"ANALYSIS COMPLETE\")\n",
     "print(\"=\" * 70)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading data...\n",
+      "Data loaded: 29726 observations\n",
+      "Treatment variable: w\n",
+      "Outcome variable: y\n",
+      "Covariates: age, polviews, income, educ, marital, sex\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Loading data...\")\n",
+    "# Read in data\n",
+    "url = \"https://docs.google.com/uc?id=1AQva5-vDlgBcM_Tv9yrO8yMYRfQJgqo_&export=download\"\n",
+    "data = pd.read_csv(url)\n",
+    "n = len(data)\n",
+    "\n",
+    "# NOTE: We'll invert treatment and control, compared to previous chapters\n",
+    "data['w'] = 1 - data['w']\n",
+    "\n",
+    "# Treatment is the wording of the question:\n",
+    "# 'does the gov't spend too much on 'assistance to the poor' (control: 0)\n",
+    "# 'does the gov't spend too much on \"welfare\"?' (treatment: 1)\n",
+    "treatment = 'w'\n",
+    "\n",
+    "# Outcome: 1 for 'yes', 0 for 'no'\n",
+    "outcome = 'y'\n",
+    "\n",
+    "# Additional covariates\n",
+    "covariates = ['age', 'polviews', 'income', 'educ', 'marital', 'sex']\n",
+    "\n",
+    "print(f\"Data loaded: {n} observations\")\n",
+    "print(f\"Treatment variable: {treatment}\")\n",
+    "print(f\"Outcome variable: {outcome}\")\n",
+    "print(f\"Covariates: {', '.join(covariates)}\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "METHOD 1: Simple Mean-Based Estimation\n",
+      "(Only valid in randomized setting)\n",
+      "======================================================================\n",
+      "\n",
+      "Policy: Treat if polviews <= 4 OR age > 50\n",
+      "Proportion treated under policy: 0.773\n",
+      "Value estimate: 0.3457179812 Std. Error: 0.0038947280\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"=\" * 70)\n",
+    "print(\"METHOD 1: Simple Mean-Based Estimation\")\n",
+    "print(\"(Only valid in randomized setting)\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Extract variables\n",
+    "X = data[covariates].values\n",
+    "Y = data[outcome].values\n",
+    "W = data[treatment].values\n",
+    "\n",
+    "# Define policy: treat if polviews <= 4 (liberal/moderate) OR age > 50\n",
+    "pi = (data['polviews'].values <= 4) | (data['age'].values > 50)\n",
+    "A = pi == 1\n",
+    "\n",
+    "# Calculate value estimate\n",
+    "value_estimate = np.mean(Y[A & (W==1)]) * np.mean(A) + \\\n",
+    "                 np.mean(Y[~A & (W==0)]) * np.mean(~A)\n",
+    "\n",
+    "# Calculate standard error\n",
+    "value_stderr = np.sqrt(\n",
+    "    np.var(Y[A & (W==1)]) / np.sum(A & (W==1)) * np.mean(A)**2 + \n",
+    "    np.var(Y[~A & (W==0)]) / np.sum(~A & (W==0)) * np.mean(~A)**2\n",
+    ")\n",
+    "\n",
+    "print(f\"\\nPolicy: Treat if polviews <= 4 OR age > 50\")\n",
+    "print(f\"Proportion treated under policy: {np.mean(A):.3f}\")\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "base",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -518,7 +626,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

From 17ec9edfa03321cdfb102e287a5b9ba226a8fb29 Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sun, 19 Oct 2025 00:38:06 +0900
Subject: [PATCH 5/6] case study v2

---
 book/cate_and_policy/policy_learning.ipynb | 405 +++++++++++++++++++--
 1 file changed, 381 insertions(+), 24 deletions(-)

diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index 7e4749a..0b5b463 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -505,12 +505,31 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "WELFARE SURVEY POLICY EVALUATION\n",
+      "======================================================================\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
     "import warnings\n",
-    "warnings.filterwarnings('ignore')"
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# Set random seed for reproducibility\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"WELFARE SURVEY POLICY EVALUATION\")\n",
+    "print(\"=\" * 70)\n",
+    "print()"
    ]
   },
   {
@@ -524,14 +543,15 @@
      "text": [
       "Loading data...\n",
       "Data loaded: 29726 observations\n",
-      "Treatment variable: w\n",
-      "Outcome variable: y\n",
-      "Covariates: age, polviews, income, educ, marital, sex\n",
       "\n"
      ]
     }
    ],
    "source": [
+    "# ==============================================================================\n",
+    "# STEP 1: LOAD AND PREPARE DATA\n",
+    "# ==============================================================================\n",
+    "\n",
     "print(\"Loading data...\")\n",
     "# Read in data\n",
     "url = \"https://docs.google.com/uc?id=1AQva5-vDlgBcM_Tv9yrO8yMYRfQJgqo_&export=download\"\n",
@@ -553,15 +573,12 @@
     "covariates = ['age', 'polviews', 'income', 'educ', 'marital', 'sex']\n",
     "\n",
     "print(f\"Data loaded: {n} observations\")\n",
-    "print(f\"Treatment variable: {treatment}\")\n",
-    "print(f\"Outcome variable: {outcome}\")\n",
-    "print(f\"Covariates: {', '.join(covariates)}\")\n",
     "print()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -569,30 +586,30 @@
      "output_type": "stream",
      "text": [
       "======================================================================\n",
-      "METHOD 1: Simple Mean-Based Estimation\n",
-      "(Only valid in randomized setting)\n",
+      "SIMPLE MEAN-BASED ESTIMATION (RCT only)\n",
       "======================================================================\n",
-      "\n",
-      "Policy: Treat if polviews <= 4 OR age > 50\n",
-      "Proportion treated under policy: 0.773\n",
-      "Value estimate: 0.3457179812 Std. Error: 0.0038947280\n"
+      "Value estimate: 0.3457179812 Std. Error: 0.0038947280\n",
+      "\n"
      ]
     }
    ],
    "source": [
+    "# ==============================================================================\n",
+    "# STEP 2: SIMPLE MEAN-BASED ESTIMATION (Only valid in randomized setting)\n",
+    "# ==============================================================================\n",
+    "\n",
     "print(\"=\" * 70)\n",
-    "print(\"METHOD 1: Simple Mean-Based Estimation\")\n",
-    "print(\"(Only valid in randomized setting)\")\n",
+    "print(\"SIMPLE MEAN-BASED ESTIMATION (RCT only)\")\n",
     "print(\"=\" * 70)\n",
     "\n",
     "# Extract variables\n",
-    "X = data[covariates].values\n",
+    "X = data[covariates]\n",
     "Y = data[outcome].values\n",
     "W = data[treatment].values\n",
     "\n",
-    "# Define policy: treat if polviews <= 4 (liberal/moderate) OR age > 50\n",
-    "pi = (data['polviews'].values <= 4) | (data['age'].values > 50)\n",
-    "A = pi == 1\n",
+    "# Define policy: treat if polviews <= 4 OR age > 50\n",
+    "pi = (X['polviews'] <= 4) | (X['age'] > 50)\n",
+    "A = pi.values == 1\n",
     "\n",
     "# Calculate value estimate\n",
     "value_estimate = np.mean(Y[A & (W==1)]) * np.mean(A) + \\\n",
@@ -604,9 +621,349 @@
     "    np.var(Y[~A & (W==0)]) / np.sum(~A & (W==0)) * np.mean(~A)**2\n",
     ")\n",
     "\n",
-    "print(f\"\\nPolicy: Treat if polviews <= 4 OR age > 50\")\n",
-    "print(f\"Proportion treated under policy: {np.mean(A):.3f}\")\n",
-    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")"
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "CAUSAL FOREST WITH AIPW\n",
+      "======================================================================\n",
+      "Using scikit-learn for Random Forest\n",
+      "\n",
+      "Fitting causal forest (randomized setting with W.hat=0.5)...\n",
+      "Causal forest fitted successfully.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 3: CAUSAL FOREST WITH AIPW\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"CAUSAL FOREST WITH AIPW\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Create model matrix (design matrix with intercept)\n",
+    "# This mimics R's model.matrix() function\n",
+    "X_design = pd.get_dummies(data[covariates], drop_first=False)\n",
+    "# Add intercept\n",
+    "X_design.insert(0, 'intercept', 1)\n",
+    "X_design = X_design.values\n",
+    "\n",
+    "Y = data[outcome].values\n",
+    "W = data[treatment].values\n",
+    "\n",
+    "# Try to use sklearn if available\n",
+    "try:\n",
+    "    from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
+    "    use_sklearn = True\n",
+    "    print(\"Using scikit-learn for Random Forest\")\n",
+    "except ImportError:\n",
+    "    use_sklearn = False\n",
+    "    print(\"scikit-learn not found. Using simplified implementation.\")\n",
+    "    print(\"For exact replication of R results, install scikit-learn: pip install scikit-learn\")\n",
+    "\n",
+    "# Causal Forest Implementation\n",
+    "if use_sklearn:\n",
+    "    class CausalForest:\n",
+    "        \"\"\"Causal Forest implementation matching grf package behavior\"\"\"\n",
+    "        \n",
+    "        def __init__(self, n_estimators=2000, max_features=None, min_samples_leaf=5, \n",
+    "                     W_hat=None, honest=True):\n",
+    "            self.n_estimators = n_estimators\n",
+    "            # Match grf default: sqrt of number of features\n",
+    "            self.max_features = max_features if max_features else 'sqrt'\n",
+    "            self.min_samples_leaf = min_samples_leaf\n",
+    "            self.W_hat_fixed = W_hat\n",
+    "            self.honest = honest\n",
+    "            \n",
+    "        def fit(self, X, Y, W):\n",
+    "            n = len(Y)\n",
+    "            \n",
+    "            # If W.hat is provided (randomized setting), use it\n",
+    "            if self.W_hat_fixed is not None:\n",
+    "                self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "            else:\n",
+    "                # Estimate propensity score\n",
+    "                ps_model = RandomForestClassifier(\n",
+    "                    n_estimators=500,\n",
+    "                    max_features=self.max_features,\n",
+    "                    min_samples_leaf=self.min_samples_leaf,\n",
+    "                    random_state=42,\n",
+    "                    n_jobs=-1\n",
+    "                )\n",
+    "                ps_model.fit(X, W)\n",
+    "                self.W_hat = ps_model.predict_proba(X)[:, 1]\n",
+    "                self.W_hat = np.clip(self.W_hat, 0.01, 0.99)\n",
+    "            \n",
+    "            # Estimate outcome model\n",
+    "            outcome_model = RandomForestRegressor(\n",
+    "                n_estimators=500,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            outcome_model.fit(X, Y)\n",
+    "            self.Y_hat = outcome_model.predict(X)\n",
+    "            \n",
+    "            # T-learner for treatment effects\n",
+    "            model_1 = RandomForestRegressor(\n",
+    "                n_estimators=1000,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            model_0 = RandomForestRegressor(\n",
+    "                n_estimators=1000,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            \n",
+    "            # Fit separate models for treated and control\n",
+    "            if np.sum(W == 1) > 0:\n",
+    "                model_1.fit(X[W == 1], Y[W == 1])\n",
+    "                self.mu_1 = model_1.predict(X)\n",
+    "            else:\n",
+    "                self.mu_1 = np.zeros(n)\n",
+    "            \n",
+    "            if np.sum(W == 0) > 0:\n",
+    "                model_0.fit(X[W == 0], Y[W == 0])\n",
+    "                self.mu_0 = model_0.predict(X)\n",
+    "            else:\n",
+    "                self.mu_0 = np.zeros(n)\n",
+    "            \n",
+    "            # Treatment effect\n",
+    "            self.tau_hat = self.mu_1 - self.mu_0\n",
+    "            \n",
+    "            return self\n",
+    "        \n",
+    "        def predict(self):\n",
+    "            return {'predictions': self.tau_hat}\n",
+    "else:\n",
+    "    # Simplified implementation without sklearn\n",
+    "    class CausalForest:\n",
+    "        def __init__(self, n_estimators=100, W_hat=None, **kwargs):\n",
+    "            self.n_estimators = min(n_estimators, 100)  # Limit for speed\n",
+    "            self.W_hat_fixed = W_hat\n",
+    "            \n",
+    "        def fit(self, X, Y, W):\n",
+    "            n = len(Y)\n",
+    "            \n",
+    "            # Fixed propensity score for RCT\n",
+    "            if self.W_hat_fixed is not None:\n",
+    "                self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "            else:\n",
+    "                # Simple estimate: empirical proportion\n",
+    "                self.W_hat = np.full(n, np.mean(W))\n",
+    "            \n",
+    "            # Simple outcome model: just use means\n",
+    "            self.Y_hat = np.full(n, np.mean(Y))\n",
+    "            \n",
+    "            # Simple treatment effect estimation\n",
+    "            if np.sum(W == 1) > 0:\n",
+    "                self.mu_1 = np.full(n, np.mean(Y[W == 1]))\n",
+    "            else:\n",
+    "                self.mu_1 = np.full(n, np.mean(Y))\n",
+    "                \n",
+    "            if np.sum(W == 0) > 0:\n",
+    "                self.mu_0 = np.full(n, np.mean(Y[W == 0]))\n",
+    "            else:\n",
+    "                self.mu_0 = np.full(n, np.mean(Y))\n",
+    "            \n",
+    "            self.tau_hat = self.mu_1 - self.mu_0\n",
+    "            \n",
+    "            return self\n",
+    "        \n",
+    "        def predict(self):\n",
+    "            return {'predictions': self.tau_hat}\n",
+    "\n",
+    "# Estimate a causal forest\n",
+    "# Important: Using W.hat=0.5 for randomized setting\n",
+    "print(\"\\nFitting causal forest (randomized setting with W.hat=0.5)...\")\n",
+    "forest = CausalForest(n_estimators=2000 if use_sklearn else 100, W_hat=0.5)\n",
+    "forest.fit(X_design, Y, W)\n",
+    "\n",
+    "# Get predictions\n",
+    "tau_hat = forest.predict()['predictions']\n",
+    "\n",
+    "# Estimate outcome models for treated and control\n",
+    "mu_hat_1 = forest.Y_hat + (1 - forest.W_hat) * tau_hat  # E[Y|X,W=1]\n",
+    "mu_hat_0 = forest.Y_hat - forest.W_hat * tau_hat  # E[Y|X,W=0]\n",
+    "\n",
+    "# Compute AIPW scores\n",
+    "gamma_hat_1 = mu_hat_1 + W / forest.W_hat * (Y - mu_hat_1)\n",
+    "gamma_hat_0 = mu_hat_0 + (1 - W) / (1 - forest.W_hat) * (Y - mu_hat_0)\n",
+    "\n",
+    "print(\"Causal forest fitted successfully.\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "POLICY EVALUATION WITH AIPW\n",
+      "======================================================================\n",
+      "Value estimate: 0.3376925438 Std. Error: 0.0034256847\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 4: POLICY EVALUATION WITH AIPW\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"POLICY EVALUATION WITH AIPW\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Use the same policy as before\n",
+    "pi = (data['polviews'] <= 4) | (data['age'] > 50)\n",
+    "pi = pi.values\n",
+    "\n",
+    "# Calculate AIPW score for the policy\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "POLICY COMPARISON\n",
+      "======================================================================\n",
+      "Difference estimate: 0.0721973740 Std. Error: 0.0022069738\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 5: POLICY COMPARISON\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"POLICY COMPARISON\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Compare with 50% random treatment policy\n",
+    "pi_2 = 0.5\n",
+    "\n",
+    "# AIPW scores for each policy\n",
+    "gamma_hat_pi_1 = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0  # Original policy\n",
+    "gamma_hat_pi_2 = pi_2 * gamma_hat_1 + (1 - pi_2) * gamma_hat_0  # 50% random\n",
+    "\n",
+    "# Difference\n",
+    "gamma_hat_pi_diff = gamma_hat_pi_1 - gamma_hat_pi_2\n",
+    "diff_estimate = np.mean(gamma_hat_pi_diff)\n",
+    "diff_stderr = np.std(gamma_hat_pi_diff) / np.sqrt(len(gamma_hat_pi_diff))\n",
+    "\n",
+    "print(f\"Difference estimate: {diff_estimate:.10f} Std. Error: {diff_stderr:.10f}\")\n",
+    "print()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "======================================================================\n",
+      "ADDITIONAL INFORMATION\n",
+      "======================================================================\n",
+      "\n",
+      "Sample size: 29726\n",
+      "Treatment rate: 0.465\n",
+      "Outcome rate (overall): 0.253\n",
+      "Outcome rate (treated): 0.438\n",
+      "Outcome rate (control): 0.092\n",
+      "\n",
+      "Policy characteristics:\n",
+      "Proportion assigned to treatment by policy: 0.773\n",
+      "Number assigned to treatment: 22988\n",
+      "Number assigned to control: 6738\n",
+      "\n",
+      "Treatment effects by policy group:\n",
+      "Treatment effect in policy group: 0.3279\n",
+      "Treatment effect outside policy group: 0.4069\n",
+      "Overall treatment effect: 0.3460\n",
+      "\n",
+      "======================================================================\n",
+      "ANALYSIS COMPLETE\n",
+      "======================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ==============================================================================\n",
+    "# ADDITIONAL SUMMARY STATISTICS\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"ADDITIONAL INFORMATION\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "print(f\"\\nSample size: {n}\")\n",
+    "print(f\"Treatment rate: {np.mean(W):.3f}\")\n",
+    "print(f\"Outcome rate (overall): {np.mean(Y):.3f}\")\n",
+    "print(f\"Outcome rate (treated): {np.mean(Y[W==1]):.3f}\")\n",
+    "print(f\"Outcome rate (control): {np.mean(Y[W==0]):.3f}\")\n",
+    "\n",
+    "print(f\"\\nPolicy characteristics:\")\n",
+    "print(f\"Proportion assigned to treatment by policy: {np.mean(pi):.3f}\")\n",
+    "print(f\"Number assigned to treatment: {np.sum(pi)}\")\n",
+    "print(f\"Number assigned to control: {np.sum(~pi)}\")\n",
+    "\n",
+    "# Check treatment effect heterogeneity\n",
+    "print(f\"\\nTreatment effects by policy group:\")\n",
+    "if np.sum(pi & (W==1)) > 0 and np.sum(pi & (W==0)) > 0:\n",
+    "    te_policy = np.mean(Y[pi & (W==1)]) - np.mean(Y[pi & (W==0)])\n",
+    "    print(f\"Treatment effect in policy group: {te_policy:.4f}\")\n",
+    "if np.sum(~pi & (W==1)) > 0 and np.sum(~pi & (W==0)) > 0:\n",
+    "    te_no_policy = np.mean(Y[~pi & (W==1)]) - np.mean(Y[~pi & (W==0)])\n",
+    "    print(f\"Treatment effect outside policy group: {te_no_policy:.4f}\")\n",
+    "\n",
+    "overall_te = np.mean(Y[W==1]) - np.mean(Y[W==0])\n",
+    "print(f\"Overall treatment effect: {overall_te:.4f}\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 70)\n",
+    "print(\"ANALYSIS COMPLETE\")\n",
+    "print(\"=\" * 70)"
    ]
   }
  ],

From c4fa2ef5444759ae2f09b17d1a32d7f7328d8c26 Mon Sep 17 00:00:00 2001
From: AIgoGracia <jse0616@gmail.com>
Date: Sun, 26 Oct 2025 23:39:54 +0900
Subject: [PATCH 6/6] case study v3

---
 book/cate_and_policy/policy_learning.ipynb | 384 +++++++++++++++++++++
 1 file changed, 384 insertions(+)

diff --git a/book/cate_and_policy/policy_learning.ipynb b/book/cate_and_policy/policy_learning.ipynb
index 0b5b463..49dc57f 100644
--- a/book/cate_and_policy/policy_learning.ipynb
+++ b/book/cate_and_policy/policy_learning.ipynb
@@ -965,6 +965,390 @@
     "print(\"ANALYSIS COMPLETE\")\n",
     "print(\"=\" * 70)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "================================================================================\n",
+    "R vs Python 구현 차이\n",
+    "================================================================================\n",
+    "\n",
+    "1. AIPW 정책 가치 (Value Estimate)\n",
+    "- R 코드 (V2): 0.3459015997 (약 34.6%) -> R의 추정치가 단순 평균에 더 가깝게 나옴\n",
+    "- Python 코드 (V2): 0.3376925438 (약 33.8%)\n",
+    "\n",
+    "2. AIPW 정책 비교 (Difference Estimate)\n",
+    "- R 코드 (V2): 0.0806035592 (약 8.1%p)\n",
+    "- Python 코드 (V2): 0.0721973740 (약 7.2%p)\n",
+    "\n",
+    "차이가 발생하는 이유:\n",
+    "----------------------------\n",
+    "1. 알고리즘 차이\n",
+    "   - R(grf): Honest splitting, debiasing, CATE 전용 트리 알고리즘 사용\n",
+    "   - Python(scikit-learn): 일반 RandomForest 기반, T-learner 사용, debiasing 없음\n",
+    "\n",
+    "2. 패키지 한계\n",
+    "   - grf (R): 인과추론 전용 패키지\n",
+    "   - scikit-learn / econml (Python): 일반 ML 기반, 구현 방식 상이\n",
+    "\n",
+    "================================================================================\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# Set random seed for reproducibility\n",
+    "np.random.seed(42)\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"FRAMING RCT POLICY EVALUATION\")\n",
+    "print(\"=\" * 70)\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 1: LOAD AND PREPARE DATA\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"Loading data...\")\n",
+    "# Read in data - 파일 경로\n",
+    "data = pd.read_csv(\"C:/Pythwd/data_framing.csv\")  # 실제 파일명\n",
+    "n = len(data)\n",
+    "\n",
+    "# 변수명\n",
+    "treatment = 'group'  # 실제 처치 변수 컬럼명\n",
+    "outcome = 'wta'      # 실제 결과 변수 컬럼명\n",
+    "\n",
+    "# 공변량 리스트\n",
+    "covariates = ['gender', 'age', 'income', 'eco', 'norm', 'edu', 'family']  # 실제 컬럼명\n",
+    "\n",
+    "print(f\"Data loaded: {n} observations\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 2: SIMPLE MEAN-BASED ESTIMATION (Only valid in randomized setting)\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"SIMPLE MEAN-BASED ESTIMATION (RCT only)\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Extract variables\n",
+    "X = data[covariates]\n",
+    "Y = data[outcome].values\n",
+    "W = data[treatment].values\n",
+    "\n",
+    "# 정책 정의 변경 (Loss Framing을 적용할 대상)\n",
+    "# 나이가 40 이상 AND 가족 수가 3 이상인 사람에게 Loss Framing 적용\n",
+    "pi = (data['age'] >= 40) & (data['family'] >= 3)\n",
+    "A = pi.values == 1\n",
+    "\n",
+    "# Calculate value estimate\n",
+    "value_estimate = np.mean(Y[A & (W==1)]) * np.mean(A) + \\\n",
+    "                 np.mean(Y[~A & (W==0)]) * np.mean(~A)\n",
+    "\n",
+    "# Calculate standard error\n",
+    "value_stderr = np.sqrt(\n",
+    "    np.var(Y[A & (W==1)]) / np.sum(A & (W==1)) * np.mean(A)**2 + \n",
+    "    np.var(Y[~A & (W==0)]) / np.sum(~A & (W==0)) * np.mean(~A)**2\n",
+    ")\n",
+    "\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 3: CAUSAL FOREST WITH AIPW\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"CAUSAL FOREST WITH AIPW\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# Create model matrix (design matrix with intercept)\n",
+    "X_design = pd.get_dummies(data[covariates], drop_first=False)\n",
+    "# Add intercept\n",
+    "X_design.insert(0, 'intercept', 1)\n",
+    "X_design = X_design.values\n",
+    "\n",
+    "Y = data[outcome].values\n",
+    "W = data[treatment].values\n",
+    "\n",
+    "# Try to use sklearn if available\n",
+    "try:\n",
+    "    from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
+    "    use_sklearn = True\n",
+    "    print(\"Using scikit-learn for Random Forest\")\n",
+    "except ImportError:\n",
+    "    use_sklearn = False\n",
+    "    print(\"scikit-learn not found. Using simplified implementation.\")\n",
+    "    print(\"For exact replication of R results, install scikit-learn: pip install scikit-learn\")\n",
+    "\n",
+    "# Causal Forest Implementation\n",
+    "if use_sklearn:\n",
+    "    class CausalForest:\n",
+    "        \"\"\"Causal Forest implementation matching grf package behavior\"\"\"\n",
+    "        \n",
+    "        def __init__(self, n_estimators=2000, max_features=None, min_samples_leaf=5, \n",
+    "                     W_hat=None, honest=True):\n",
+    "            self.n_estimators = n_estimators\n",
+    "            self.max_features = max_features if max_features else 'sqrt'\n",
+    "            self.min_samples_leaf = min_samples_leaf\n",
+    "            self.W_hat_fixed = W_hat\n",
+    "            self.honest = honest\n",
+    "            \n",
+    "        def fit(self, X, Y, W):\n",
+    "            n = len(Y)\n",
+    "            \n",
+    "            # If W.hat is provided (randomized setting), use it\n",
+    "            if self.W_hat_fixed is not None:\n",
+    "                self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "            else:\n",
+    "                # Estimate propensity score\n",
+    "                ps_model = RandomForestClassifier(\n",
+    "                    n_estimators=500,\n",
+    "                    max_features=self.max_features,\n",
+    "                    min_samples_leaf=self.min_samples_leaf,\n",
+    "                    random_state=42,\n",
+    "                    n_jobs=-1\n",
+    "                )\n",
+    "                ps_model.fit(X, W)\n",
+    "                self.W_hat = ps_model.predict_proba(X)[:, 1]\n",
+    "                self.W_hat = np.clip(self.W_hat, 0.01, 0.99)\n",
+    "            \n",
+    "            # Estimate outcome model\n",
+    "            outcome_model = RandomForestRegressor(\n",
+    "                n_estimators=500,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            outcome_model.fit(X, Y)\n",
+    "            self.Y_hat = outcome_model.predict(X)\n",
+    "            \n",
+    "            # T-learner for treatment effects\n",
+    "            model_1 = RandomForestRegressor(\n",
+    "                n_estimators=1000,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            model_0 = RandomForestRegressor(\n",
+    "                n_estimators=1000,\n",
+    "                max_features=self.max_features,\n",
+    "                min_samples_leaf=self.min_samples_leaf,\n",
+    "                random_state=42,\n",
+    "                n_jobs=-1\n",
+    "            )\n",
+    "            \n",
+    "            # Fit separate models for treated and control\n",
+    "            if np.sum(W == 1) > 0:\n",
+    "                model_1.fit(X[W == 1], Y[W == 1])\n",
+    "                self.mu_1 = model_1.predict(X)\n",
+    "            else:\n",
+    "                self.mu_1 = np.zeros(n)\n",
+    "            \n",
+    "            if np.sum(W == 0) > 0:\n",
+    "                model_0.fit(X[W == 0], Y[W == 0])\n",
+    "                self.mu_0 = model_0.predict(X)\n",
+    "            else:\n",
+    "                self.mu_0 = np.zeros(n)\n",
+    "            \n",
+    "            # Treatment effect\n",
+    "            self.tau_hat = self.mu_1 - self.mu_0\n",
+    "            \n",
+    "            return self\n",
+    "        \n",
+    "        def predict(self):\n",
+    "            return {'predictions': self.tau_hat}\n",
+    "else:\n",
+    "    # Simplified implementation without sklearn\n",
+    "    class CausalForest:\n",
+    "        def __init__(self, n_estimators=100, W_hat=None, **kwargs):\n",
+    "            self.n_estimators = min(n_estimators, 100)\n",
+    "            self.W_hat_fixed = W_hat\n",
+    "            \n",
+    "        def fit(self, X, Y, W):\n",
+    "            n = len(Y)\n",
+    "            \n",
+    "            if self.W_hat_fixed is not None:\n",
+    "                self.W_hat = np.full(n, self.W_hat_fixed)\n",
+    "            else:\n",
+    "                self.W_hat = np.full(n, np.mean(W))\n",
+    "            \n",
+    "            self.Y_hat = np.full(n, np.mean(Y))\n",
+    "            \n",
+    "            if np.sum(W == 1) > 0:\n",
+    "                self.mu_1 = np.full(n, np.mean(Y[W == 1]))\n",
+    "            else:\n",
+    "                self.mu_1 = np.full(n, np.mean(Y))\n",
+    "                \n",
+    "            if np.sum(W == 0) > 0:\n",
+    "                self.mu_0 = np.full(n, np.mean(Y[W == 0]))\n",
+    "            else:\n",
+    "                self.mu_0 = np.full(n, np.mean(Y))\n",
+    "            \n",
+    "            self.tau_hat = self.mu_1 - self.mu_0\n",
+    "            \n",
+    "            return self\n",
+    "        \n",
+    "        def predict(self):\n",
+    "            return {'predictions': self.tau_hat}\n",
+    "\n",
+    "# Estimate a causal forest\n",
+    "print(\"\\nFitting causal forest (randomized setting with W.hat=0.5)...\")\n",
+    "forest = CausalForest(n_estimators=2000 if use_sklearn else 100, W_hat=0.5)\n",
+    "forest.fit(X_design, Y, W)\n",
+    "\n",
+    "# Get predictions\n",
+    "tau_hat = forest.predict()['predictions']\n",
+    "\n",
+    "# Estimate outcome models for treated and control\n",
+    "mu_hat_1 = forest.Y_hat + (1 - forest.W_hat) * tau_hat  # E[Y|X,W=1]\n",
+    "mu_hat_0 = forest.Y_hat - forest.W_hat * tau_hat  # E[Y|X,W=0]\n",
+    "\n",
+    "# Compute AIPW scores\n",
+    "gamma_hat_1 = mu_hat_1 + W / forest.W_hat * (Y - mu_hat_1)\n",
+    "gamma_hat_0 = mu_hat_0 + (1 - W) / (1 - forest.W_hat) * (Y - mu_hat_0)\n",
+    "\n",
+    "print(\"Causal forest fitted successfully.\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 4: POLICY EVALUATION WITH AIPW\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"POLICY EVALUATION WITH AIPW\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# 정책 정의 동일하게 반영\n",
+    "pi = (data['age'] >= 40) & (data['family'] >= 3)\n",
+    "pi = pi.values\n",
+    "\n",
+    "# AIPW value estimation\n",
+    "gamma_hat_pi = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0\n",
+    "value_estimate = np.mean(gamma_hat_pi)\n",
+    "value_stderr = np.std(gamma_hat_pi) / np.sqrt(len(gamma_hat_pi))\n",
+    "\n",
+    "print(f\"Value estimate: {value_estimate:.10f} Std. Error: {value_stderr:.10f}\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 5: POLICY COMPARISON\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"POLICY COMPARISON\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "# 비교 대상 정책: 무작위 50% Loss Framing\n",
+    "pi_2 = 0.5\n",
+    "\n",
+    "# 동일한 정책 정의 사용\n",
+    "pi = (data['age'] >= 40) & (data['family'] >= 3)\n",
+    "pi = pi.values\n",
+    "\n",
+    "gamma_hat_pi_1 = pi * gamma_hat_1 + (1 - pi) * gamma_hat_0  # 정책 기반\n",
+    "gamma_hat_pi_2 = pi_2 * gamma_hat_1 + (1 - pi_2) * gamma_hat_0  # 50% 무작위\n",
+    "\n",
+    "gamma_hat_pi_diff = gamma_hat_pi_1 - gamma_hat_pi_2\n",
+    "diff_estimate = np.mean(gamma_hat_pi_diff)\n",
+    "diff_stderr = np.std(gamma_hat_pi_diff) / np.sqrt(len(gamma_hat_pi_diff))\n",
+    "\n",
+    "print(f\"Difference estimate: {diff_estimate:.10f} Std. Error: {diff_stderr:.10f}\")\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ==============================================================================\n",
+    "# STEP 6: ADDITIONAL SUMMARY STATISTICS\n",
+    "# ==============================================================================\n",
+    "\n",
+    "print(\"=\" * 70)\n",
+    "print(\"ADDITIONAL INFORMATION\")\n",
+    "print(\"=\" * 70)\n",
+    "\n",
+    "print(f\"\\nSample size: {n}\")\n",
+    "print(f\"Treatment rate: {np.mean(W):.3f}\")\n",
+    "print(f\"Outcome rate (overall): {np.mean(Y):.3f}\")\n",
+    "print(f\"Outcome rate (Loss Framing): {np.mean(Y[W==1]):.3f}\")\n",
+    "print(f\"Outcome rate (Gain Framing): {np.mean(Y[W==0]):.3f}\")\n",
+    "\n",
+    "print(f\"\\nPolicy characteristics:\")\n",
+    "print(f\"Proportion assigned to Loss Framing by policy: {np.mean(pi):.3f}\")\n",
+    "print(f\"Number assigned to Loss Framing: {np.sum(pi)}\")\n",
+    "print(f\"Number assigned to Gain Framing: {np.sum(~pi)}\")\n",
+    "\n",
+    "# Framing effect heterogeneity\n",
+    "print(f\"\\nFraming effects by policy group:\")\n",
+    "if np.sum(pi & (W==1)) > 0 and np.sum(pi & (W==0)) > 0:\n",
+    "    te_policy = np.mean(Y[pi & (W==1)]) - np.mean(Y[pi & (W==0)])\n",
+    "    print(f\"Framing effect in Loss-recommended group: {te_policy:.4f}\")\n",
+    "if np.sum(~pi & (W==1)) > 0 and np.sum(~pi & (W==0)) > 0:\n",
+    "    te_no_policy = np.mean(Y[~pi & (W==1)]) - np.mean(Y[~pi & (W==0)])\n",
+    "    print(f\"Framing effect in Gain-recommended group: {te_no_policy:.4f}\")\n",
+    "\n",
+    "overall_te = np.mean(Y[W==1]) - np.mean(Y[W==0])\n",
+    "print(f\"Overall framing effect (Loss - Gain): {overall_te:.4f}\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 70)\n",
+    "print(\"ANALYSIS COMPLETE\")\n",
+    "print(\"=\" * 70)"
+   ]
   }
  ],
  "metadata": {