diff --git "a/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\200\341\205\265\341\206\267\341\204\211\341\205\245\341\204\213\341\205\247\341\206\253.ipynb" "b/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\200\341\205\265\341\206\267\341\204\211\341\205\245\341\204\213\341\205\247\341\206\253.ipynb"
new file mode 100644
index 0000000..4d72780
--- /dev/null
+++ "b/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\200\341\205\265\341\206\267\341\204\211\341\205\245\341\204\213\341\205\247\341\206\253.ipynb"
@@ -0,0 +1,1204 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.linear_model import Ridge ,LogisticRegression\n",
+ "from sklearn.model_selection import train_test_split ,cross_val_score\n",
+ "from sklearn.feature_extraction.text import CountVectorizer ,TfidfVectorizer\n",
+ "import pandas as pd\n",
+ "\n",
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')\n",
+ "\n",
+ "mercari_df = pd.read_csv('/content/drive/MyDrive/EuronData/mercari_train.tsv',sep='\\t')\n",
+ "print(mercari_df.shape)\n",
+ "mercari_df.head(3)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 320
+ },
+ "id": "lUxDYKvHA9eB",
+ "outputId": "3b79d3ea-e56a-4712-cd8c-73c41ee51d93"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(1482535, 8)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " train_id name item_condition_id \\\n",
+ "0 0 MLB Cincinnati Reds T Shirt Size XL 3 \n",
+ "1 1 Razer BlackWidow Chroma Keyboard 3 \n",
+ "2 2 AVA-VIV Blouse 1 \n",
+ "\n",
+ " category_name brand_name price \\\n",
+ "0 Men/Tops/T-shirts NaN 10.0 \n",
+ "1 Electronics/Computers & Tablets/Components & P... Razer 52.0 \n",
+ "2 Women/Tops & Blouses/Blouse Target 10.0 \n",
+ "\n",
+ " shipping item_description \n",
+ "0 1 No description yet \n",
+ "1 0 This keyboard is in great condition and works ... \n",
+ "2 1 Adorable top with a hint of lace and a key hol... "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " train_id \n",
+ " name \n",
+ " item_condition_id \n",
+ " category_name \n",
+ " brand_name \n",
+ " price \n",
+ " shipping \n",
+ " item_description \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 0 \n",
+ " MLB Cincinnati Reds T Shirt Size XL \n",
+ " 3 \n",
+ " Men/Tops/T-shirts \n",
+ " NaN \n",
+ " 10.0 \n",
+ " 1 \n",
+ " No description yet \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " Razer BlackWidow Chroma Keyboard \n",
+ " 3 \n",
+ " Electronics/Computers & Tablets/Components & P... \n",
+ " Razer \n",
+ " 52.0 \n",
+ " 0 \n",
+ " This keyboard is in great condition and works ... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2 \n",
+ " AVA-VIV Blouse \n",
+ " 1 \n",
+ " Women/Tops & Blouses/Blouse \n",
+ " Target \n",
+ " 10.0 \n",
+ " 1 \n",
+ " Adorable top with a hint of lace and a key hol... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "mercari_df"
+ }
+ },
+ "metadata": {},
+ "execution_count": 2
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(mercari_df.info())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "JUn0ECTJB5rh",
+ "outputId": "ecab9a4b-b76f-4ca8-a705-0f8cb78e0a5a"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 1482535 entries, 0 to 1482534\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 train_id 1482535 non-null int64 \n",
+ " 1 name 1482535 non-null object \n",
+ " 2 item_condition_id 1482535 non-null int64 \n",
+ " 3 category_name 1476208 non-null object \n",
+ " 4 brand_name 849853 non-null object \n",
+ " 5 price 1482535 non-null float64\n",
+ " 6 shipping 1482535 non-null int64 \n",
+ " 7 item_description 1482529 non-null object \n",
+ "dtypes: float64(1), int64(3), object(4)\n",
+ "memory usage: 90.5+ MB\n",
+ "None\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "y_train_df = mercari_df['price']\n",
+ "plt.figure(figsize=(6,4))\n",
+ "sns.histplot(y_train_df, bins=100)\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 388
+ },
+ "id": "s0vdW9LgB5ua",
+ "outputId": "b85099f5-d146-454e-c340-0eaf6880d3c7"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAFzCAYAAAAty0N3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMEBJREFUeJzt3XtYVPXe//8XIDPgYcBDgBQqZnkk8RBEB6stt+imvTPbd2rusiKtbiyVfZu6y1MnTXdaeazdnXrdu9L6XdsOanYZZlYSKokKJplpequASTBaCsh8fn/0ZV2OoAKig6vn47rmupj1ec9a7w9LZl6uWWvGzxhjBAAAYAP+vm4AAACgvhBsAACAbRBsAACAbRBsAACAbRBsAACAbRBsAACAbRBsAACAbRBsAACAbTTydQO/Jx6PR4cOHVKzZs3k5+fn63YAALhsGGN07NgxRUZGyt//7MdlCDaX0KFDhxQVFeXrNgAAuGwdOHBAV1111VnHCTaXULNmzST9tlNcLpePuwEA4PLhdrsVFRVlvZaeDcHmEqp8+8nlchFsAACog/OdysHJwwAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDYINgAAwDb4SoXLXFlZmbKzs72WxcbGyuFw+KYhAAB8iGBzmcvOztao+R8oJLK9JKnk0A+alyrFxcX5uDMAAC49go0NhES2V8voLr5uAwAAn+McGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBsEGwAAYBs+DTYVFRWaNGmSoqOjFRwcrKuvvlrPPvusjDFWjTFGkydPVuvWrRUcHKzExETt3r3baz1FRUUaNmyYXC6XQkNDlZKSouPHj3vVbN++XbfccouCgoIUFRWlmTNnVunnvffeU6dOnRQUFKSYmBitXr3aa7wmvQAAAN/xabB58cUXtXDhQs2bN0/ffvutXnzxRc2cOVNz5861ambOnKlXX31VixYtUmZmppo0aaKkpCSdPHnSqhk2bJhyc3O1du1arVy5Uhs2bNDIkSOtcbfbrX79+qlt27bKysrSrFmzNHXqVL3++utWzcaNGzV06FClpKRo69atGjhwoAYOHKicnJxa9QIAAHzI+FBycrJ56KGHvJYNGjTIDBs2zBhjjMfjMREREWbWrFnWeHFxsXE6neadd94xxhizc+dOI8ls3rzZqvn444+Nn5+fOXjwoDHGmAULFpjmzZub0tJSq2b8+PGmY8eO1v177rnHJCcne/USHx9vHnnkkRr3cj4lJSVGkikpKalRfU1kZmaaxAlvmMGvbTSDX9toEie8YTIzM+tt/QAANAQ1fQ316RGbG2+8Uenp6fruu+8kSdu2bdOXX36pAQMGSJL27t2r/Px8JSYmWo8JCQlRfHy8MjIyJEkZGRkKDQ1V7969rZrExET5+/srMzPTqunTp48cDodVk5SUpLy8PP38889Wzenbqayp3E5NejlTaWmp3G631w0AAFw8jXy58QkTJsjtdqtTp04KCAhQRUWFnn/+eQ0bNkySlJ+fL0kKDw/3elx4eLg1lp+fr7CwMK/xRo0aqUWLFl410dHRVdZROda8eXPl5+efdzvn6+VM06dP17Rp02rwmwAAAPXBp0ds3n33Xb311lt6++239c0332jp0qX6xz/+oaVLl/qyrXozceJElZSUWLcDBw74uiUAAGzNp0dsxo0bpwkTJmjIkCGSpJiYGP3444+aPn26hg8froiICElSQUGBWrdubT2uoKBAsbGxkqSIiAgVFhZ6rffUqVMqKiqyHh8REaGCggKvmsr756s5ffx8vZzJ6XTK6XTW7JcBAAAumE+P2Pz666/y9/duISAgQB6PR5IUHR2tiIgIpaenW+Nut1uZmZlKSEiQJCUkJKi4uFhZWVlWzbp16+TxeBQfH2/VbNiwQeXl5VbN2rVr1bFjRzVv3tyqOX07lTWV26lJLwAAwLd8Gmz+9Kc/6fnnn9eqVau0b98+rVixQrNnz9Zdd90lSfLz89OYMWP03HPP6cMPP9SOHTt0//33KzIyUgMHDpQkde7cWf3799eIESO0adMmffXVVxo1apSGDBmiyMhISdK9994rh8OhlJQU5ebmavny5XrllVeUlpZm9TJ69GitWbNGL730knbt2qWpU6dqy5YtGjVqVI17AQAAPnaJrtKqltvtNqNHjzZt2rQxQUFBpn379uapp57yuizb4/GYSZMmmfDwcON0Ok3fvn1NXl6e13qOHj1qhg4dapo2bWpcLpd58MEHzbFjx7xqtm3bZm6++WbjdDrNlVdeaWbMmFGln3fffddce+21xuFwmK5du5pVq1Z5jdekl3Phcm8AAOqmpq+hfsac9jG/uKjcbrdCQkJUUlIil8tVL+vctGmTnlqxQy2ju0iSju7dqefvilFcXFy9rB8AgIagpq+hfFcUAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDYINAACwDZ8Hm4MHD+qvf/2rWrZsqeDgYMXExGjLli3WuDFGkydPVuvWrRUcHKzExETt3r3bax1FRUUaNmyYXC6XQkNDlZKSouPHj3vVbN++XbfccouCgoIUFRWlmTNnVunlvffeU6dOnRQUFKSYmBitXr3aa7wmvQAAAN/xabD5+eefddNNNykwMFAff/yxdu7cqZdeeknNmze3ambOnKlXX31VixYtUmZmppo0aaKkpCSdPHnSqhk2bJhyc3O1du1arVy5Uhs2bNDIkSOtcbfbrX79+qlt27bKysrSrFmzNHXqVL3++utWzcaNGzV06FClpKRo69atGjhwoAYOHKicnJxa9QIAAHzI+ND48ePNzTfffNZxj8djIiIizKxZs6xlxcXFxul0mnfeeccYY8zOnTuNJLN582ar5uOPPzZ+fn7m4MGDxhhjFixYYJo3b25KS0u9tt2xY0fr/j333GOSk5O9th8fH28eeeSRGvdyPiUlJUaSKSkpqVF9TWRmZprECW+Ywa9tNINf22gSJ7xhMjMz6239AAA0BDV9DfXpEZsPP/xQvXv31n/+538qLCxMPXr00D//+U9rfO/evcrPz1diYqK1LCQkRPHx8crIyJAkZWRkKDQ0VL1797ZqEhMT5e/vr8zMTKumT58+cjgcVk1SUpLy8vL0888/WzWnb6eypnI7NenlTKWlpXK73V43AABw8fg02Pzwww9auHChrrnmGn3yySd67LHH9MQTT2jp0qWSpPz8fElSeHi41+PCw8Otsfz8fIWFhXmNN2rUSC1atPCqqW4dp2/jbDWnj5+vlzNNnz5dISEh1i0qKup8vxIAAHABfBpsPB6PevbsqRdeeEE9evTQyJEjNWLECC1atMiXbdWbiRMnqqSkxLodOHDA1y0BAGBrPg02rVu3VpcuXbyWde7cWfv375ckRURESJIKCgq8agoKCqyxiIgIFRYWeo2fOnVKRUVFXjXVreP0bZyt5vTx8/VyJqfTKZfL5XUDAAAXj0+DzU033aS8vDyvZd99953atm0rSYqOjlZERITS09OtcbfbrczMTCUkJEiSEhISVFxcrKysLKtm3bp18ng8io+Pt2o2bNig8vJyq2bt2rXq2LGjdQVWQkKC13Yqayq3U5NeAACAb/k02IwdO1Zff/21XnjhBX3//fd6++239frrrys1NVWS5OfnpzFjxui5557Thx9+qB07duj+++9XZGSkBg4cKOm3Izz9+/fXiBEjtGnTJn311VcaNWqUhgwZosjISEnSvffeK4fDoZSUFOXm5mr58uV65ZVXlJaWZvUyevRorVmzRi+99JJ27dqlqVOnasuWLRo1alSNewEAAD52ia7SOquPPvrIdOvWzTidTtOpUyfz+uuve417PB4zadIkEx4ebpxOp+nbt6/Jy8vzqjl69KgZOnSoadq0qXG5XObBBx80x44d86rZtm2bufnmm43T6TRXXnmlmTFjRpVe3n33XXPttdcah8NhunbtalatWlXrXs6Fy70BAKibmr6G+hljjK/D1e+F2+1WSEiISkpK6u18m02bNumpFTvUMvq3c5WO7t2p5++KUVxcXL2sHwCAhqCmr6E+/0oFAACA+kKwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwAQAAtlGnYNO+fXsdPXq0yvLi4mK1b9/+gpsCAACoizoFm3379qmioqLK8tLSUh08ePCCmwIAAKiLRrUp/vDDD62fP/nkE4WEhFj3KyoqlJ6ernbt2tVbcwAAALVRq2AzcOBASZKfn5+GDx/uNRYYGKh27drppZdeqrfmAAAAaqNWwcbj8UiSoqOjtXnzZrVq1eqiNAUAAFAXtQo2lfbu3VvffQAAAFywOgUbSUpPT1d6eroKCwutIzmV3nzzzQtuDAAAoLbqFGymTZumZ555Rr1791br1q3l5+dX330BAADUWp2CzaJFi7RkyRLdd9999d0PAABAndXpc2zKysp044031ncvAAAAF6ROwebhhx/W22+/Xd+9AAAAXJA6vRV18uRJvf766/r000913XXXKTAw0Gt89uzZ9dIcAABAbdQp2Gzfvl2xsbGSpJycHK8xTiQGAAC+Uqdg89lnn9V3HwAAABesTufYAAAANER1OmJz++23n/Mtp3Xr1tW5IQAAgLqqU7CpPL+mUnl5ubKzs5WTk1PlyzEBAAAulToFmzlz5lS7fOrUqTp+/PgFNQQAAFBX9XqOzV//+le+JwoAAPhMvQabjIwMBQUF1ecqAQAAaqxOb0UNGjTI674xRocPH9aWLVs0adKkemkMAACgtuoUbEJCQrzu+/v7q2PHjnrmmWfUr1+/emkMAACgtuoUbBYvXlzffQAAAFywOgWbSllZWfr2228lSV27dlWPHj3qpSkAAIC6qFOwKSws1JAhQ7R+/XqFhoZKkoqLi3X77bdr2bJluuKKK+qzRwAAgBqp01VRjz/+uI4dO6bc3FwVFRWpqKhIOTk5crvdeuKJJ+q7RwAAgBqp0xGbNWvW6NNPP1Xnzp2tZV26dNH8+fM5eRgAAPhMnY7YeDweBQYGVlkeGBgoj8dzwU0BAADURZ2CzR/+8AeNHj1ahw4dspYdPHhQY8eOVd++feutOQAAgNqoU7CZN2+e3G632rVrp6uvvlpXX321oqOj5Xa7NXfu3PruEQAAoEbqdI5NVFSUvvnmG3366afatWuXJKlz585KTEys1+YAAABqo1ZHbNatW6cuXbrI7XbLz89P//Ef/6HHH39cjz/+uK6//np17dpVX3zxxcXqFQAA4JxqFWxefvlljRgxQi6Xq8pYSEiIHnnkEc2ePbvemgMAAKiNWgWbbdu2qX///mcd79evn7Kysi64KQAAgLqoVbApKCio9jLvSo0aNdKRI0cuuCkAAIC6qFWwufLKK5WTk3PW8e3bt6t169YX3BQAAEBd1CrY/PGPf9SkSZN08uTJKmMnTpzQlClTdMcdd9SpkRkzZsjPz09jxoyxlp08eVKpqalq2bKlmjZtqrvvvlsFBQVej9u/f7+Sk5PVuHFjhYWFady4cTp16pRXzfr169WzZ085nU516NBBS5YsqbL9+fPnq127dgoKClJ8fLw2bdrkNV6TXgAAgG/VKtg8/fTTKioq0rXXXquZM2fqgw8+0AcffKAXX3xRHTt2VFFRkZ566qlaN7F582a99tpruu6667yWjx07Vh999JHee+89ff755zp06JAGDRpkjVdUVCg5OVllZWXauHGjli5dqiVLlmjy5MlWzd69e5WcnKzbb79d2dnZGjNmjB5++GF98sknVs3y5cuVlpamKVOm6JtvvlH37t2VlJSkwsLCGvcCAAAaAFNL+/btMwMGDDD+/v7Gz8/P+Pn5GX9/fzNgwADzww8/1HZ15tixY+aaa64xa9euNbfeeqsZPXq0McaY4uJiExgYaN577z2r9ttvvzWSTEZGhjHGmNWrVxt/f3+Tn59v1SxcuNC4XC5TWlpqjDHmySefNF27dvXa5uDBg01SUpJ1Py4uzqSmplr3KyoqTGRkpJk+fXqNe6mJkpISI8mUlJTU+DHnk5mZaRInvGEGv7bRDH5to0mc8IbJzMyst/UDANAQ1PQ1tNafPNy2bVutXr1aP/30kzIzM/X111/rp59+0urVqxUdHV3rYJWamqrk5OQqH+6XlZWl8vJyr+WdOnVSmzZtlJGRIUnKyMhQTEyMwsPDrZqkpCS53W7l5uZaNWeuOykpyVpHWVmZsrKyvGr8/f2VmJho1dSkl+qUlpbK7XZ73QAAwMVTp08elqTmzZvr+uuvv6CNL1u2TN988402b95cZSw/P18Oh0OhoaFey8PDw5Wfn2/VnB5qKscrx85V43a7deLECf3888+qqKiotqbyU5Vr0kt1pk+frmnTpp11HAAA1K86fVdUfThw4IBGjx6tt956S0FBQb5q46KaOHGiSkpKrNuBAwd83RIAALbms2CTlZWlwsJC9ezZU40aNVKjRo30+eef69VXX1WjRo0UHh6usrIyFRcXez2uoKBAERERkqSIiIgqVyZV3j9fjcvlUnBwsFq1aqWAgIBqa05fx/l6qY7T6ZTL5fK6AQCAi8dnwaZv377asWOHsrOzrVvv3r01bNgw6+fAwEClp6dbj8nLy9P+/fuVkJAgSUpISNCOHTu8rl5au3atXC6XunTpYtWcvo7Kmsp1OBwO9erVy6vG4/EoPT3dqunVq9d5ewEAAL5X53NsLlSzZs3UrVs3r2VNmjRRy5YtreUpKSlKS0tTixYt5HK59PjjjyshIUE33HCDpN++wqFLly667777NHPmTOXn5+vpp59WamqqnE6nJOnRRx/VvHnz9OSTT+qhhx7SunXr9O6772rVqlXWdtPS0jR8+HD17t1bcXFxevnll/XLL7/owQcflPTb92CdrxcAAOB7Pgs2NTFnzhz5+/vr7rvvVmlpqZKSkrRgwQJrPCAgQCtXrtRjjz2mhIQENWnSRMOHD9czzzxj1URHR2vVqlUaO3asXnnlFV111VV64403lJSUZNUMHjxYR44c0eTJk5Wfn6/Y2FitWbPG64Ti8/UCAAB8z88YY3zdxO+F2+1WSEiISkpK6u18m02bNumpFTvUMvq3t96O7t2p5++KUVxcXL2sHwCAhqCmr6EN+ogNas9Tccr6DJ9KsbGxcjgcPuoIAIBLh2BjM8cK9mvOjycV/p1HklRy6AfNSxVHcAAAvwsEGxtqFt7WemsKAIDfE59d7g0AAFDfCDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2CDYAAMA2fBpspk+fruuvv17NmjVTWFiYBg4cqLy8PK+akydPKjU1VS1btlTTpk119913q6CgwKtm//79Sk5OVuPGjRUWFqZx48bp1KlTXjXr169Xz5495XQ61aFDBy1ZsqRKP/Pnz1e7du0UFBSk+Ph4bdq0qda9AAAA3/FpsPn888+Vmpqqr7/+WmvXrlV5ebn69eunX375xaoZO3asPvroI7333nv6/PPPdejQIQ0aNMgar6ioUHJyssrKyrRx40YtXbpUS5Ys0eTJk62avXv3Kjk5Wbfffruys7M1ZswYPfzww/rkk0+smuXLlystLU1TpkzRN998o+7duyspKUmFhYU17gUAAPiWnzHG+LqJSkeOHFFYWJg+//xz9enTRyUlJbriiiv09ttv6y9/+YskadeuXercubMyMjJ0ww036OOPP9Ydd9yhQ4cOKTw8XJK0aNEijR8/XkeOHJHD4dD48eO1atUq5eTkWNsaMmSIiouLtWbNGklSfHy8rr/+es2bN0+S5PF4FBUVpccff1wTJkyoUS/n43a7FRISopKSErlcrnr5nW3atElPrdihltFdJEk/fLVSgSHhiup2vSTp6N6dev6uGMXFxdXL9gAA8IWavoY2qHNsSkpKJEktWrSQJGVlZam8vFyJiYlWTadOndSmTRtlZGRIkjIyMhQTE2OFGklKSkqS2+1Wbm6uVXP6OiprKtdRVlamrKwsrxp/f38lJiZaNTXp5UylpaVyu91eNwAAcPE0mGDj8Xg0ZswY3XTTTerWrZskKT8/Xw6HQ6GhoV614eHhys/Pt2pODzWV45Vj56pxu906ceKEfvrpJ1VUVFRbc/o6ztfLmaZPn66QkBDrFhUVVcPfBgAAqIsGE2xSU1OVk5OjZcuW+bqVejNx4kSVlJRYtwMHDvi6JQAAbK2RrxuQpFGjRmnlypXasGGDrrrqKmt5RESEysrKVFxc7HWkpKCgQBEREVbNmVcvVV6pdHrNmVcvFRQUyOVyKTg4WAEBAQoICKi25vR1nK+XMzmdTjmdzlr8JgAAwIXw6REbY4xGjRqlFStWaN26dYqOjvYa79WrlwIDA5Wenm4ty8vL0/79+5WQkCBJSkhI0I4dO7yuXlq7dq1cLpe6dOli1Zy+jsqaynU4HA716tXLq8bj8Sg9Pd2qqUkvAADAt3x6xCY1NVVvv/22PvjgAzVr1sw6VyUkJETBwcEKCQlRSkqK0tLS1KJFC7lcLj3++ONKSEiwrkLq16+funTpovvuu08zZ85Ufn6+nn76aaWmplpHSx599FHNmzdPTz75pB566CGtW7dO7777rlatWmX1kpaWpuHDh6t3796Ki4vTyy+/rF9++UUPPvig1dP5egEAAL7l02CzcOFCSdJtt93mtXzx4sV64IEHJElz5syRv7+/7r77bpWWliopKUkLFiywagMCArRy5Uo99thjSkhIUJMmTTR8+HA988wzVk10dLRWrVqlsWPH6pVXXtFVV12lN954Q0lJSVbN4MGDdeTIEU2ePFn5+fmKjY3VmjVrvE4oPl8vAADAtxrU59jYHZ9jAwBA3VyWn2MDAABwIQg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANgg2AADANhr5ugFcXJ6KU8rNza2yPDY2Vg6HwwcdAQBw8RBsbO5YwX7N+fGkwr/zWMtKDv2gealSXFycDzsDAKD+EWx+B5qFt1XL6C6+bgMAgIuOc2wAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtEGwAAIBtNPJ1A7j0PBWnlJub67UsNjZWDofDRx0BAFA/CDa/Q8cK9mvOjycV/p1HklRy6AfNS5Xi4uJ83BkAABeGYPM71Sy8rVpGd/F1GwAA1CvOsQEAALZBsAEAALZBsAEAALZBsAEAALbBycPg8m8AgG0QbMDl3wAA2yDYQBKXfwMA7IFzbAAAgG1wxAZVcM4NAOByRbCppfnz52vWrFnKz89X9+7dNXfuXNudi3LmOTc/H9it1L656tq1q1cdYQcA0NAQbGph+fLlSktL06JFixQfH6+XX35ZSUlJysvLU1hYmK/bq1enn3NTcugHzVmTYwUdqfqwQ9ABAPgawaYWZs+erREjRujBBx+UJC1atEirVq3Sm2++qQkTJvi4u4vrzJOLzww7Zwad8vJySVJgYGC192tSU5N1EKYAAKcj2NRQWVmZsrKyNHHiRGuZv7+/EhMTlZGRUe1jSktLVVpaat0vKSmRJLnd7nrr6/jx4yra961OlZ74bRuH96nRsRI5GvlXe78mNTVeR9Pm1naPFR7QtDe+VWjrLEnS0X3fKsDZVKGto6q9X5Oa893/pahAY/9yuzp16lRvv08AQP3o3bt3va6v8rXTGHPOOoJNDf3000+qqKhQeHi41/Lw8HDt2rWr2sdMnz5d06ZNq7I8Kiqqmur6tfM892tSU5d1/FjL+3V5zOn3Uz/7/6pZIwDAro4dO6aQkJCzjhNsLqKJEycqLS3Nuu/xeFRUVKSWLVvKz8+vXrbhdrsVFRWlAwcOyOVy1cs6fc1uc7LbfCT7zclu85GY0+XAbvORLu6cjDE6duyYIiMjz1lHsKmhVq1aKSAgQAUFBV7LCwoKFBERUe1jnE6nnE6n17LQ0NCL0p/L5bLNH0Ylu83JbvOR7Dcnu81HYk6XA7vNR7p4czrXkZpKfEBfDTkcDvXq1Uvp6enWMo/Ho/T0dCUkJPiwMwAAUIkjNrWQlpam4cOHq3fv3oqLi9PLL7+sX375xbpKCgAA+BbBphYGDx6sI0eOaPLkycrPz1dsbKzWrFlT5YTiS8npdGrKlClV3vK6nNltTnabj2S/OdltPhJzuhzYbT5Sw5iTnznfdVMAAACXCc6xAQAAtkGwAQAAtkGwAQAAtkGwAQAAtkGwuczNnz9f7dq1U1BQkOLj47Vp0yZft1St6dOn6/rrr1ezZs0UFhamgQMHKi8vz6vmtttuk5+fn9ft0Ucf9arZv3+/kpOT1bhxY4WFhWncuHE6derUpZyKJGnq1KlVej39O6tOnjyp1NRUtWzZUk2bNtXdd99d5cMdG8pcKrVr167KnPz8/JSamiqp4e+fDRs26E9/+pMiIyPl5+en999/32vcGKPJkyerdevWCg4OVmJionbv3u1VU1RUpGHDhsnlcik0NFQpKSk6fvy4V8327dt1yy23KCgoSFFRUZo5c6ZP5lReXq7x48crJiZGTZo0UWRkpO6//34dOnTIax3V7dcZM2Y0yDlJ0gMPPFCl3/79+3vVNKT9dL75VPc35efnp1mzZlk1DWkf1eS5ur6e39avX6+ePXvK6XSqQ4cOWrJkSf1MwuCytWzZMuNwOMybb75pcnNzzYgRI0xoaKgpKCjwdWtVJCUlmcWLF5ucnByTnZ1t/vjHP5o2bdqY48ePWzW33nqrGTFihDl8+LB1KykpscZPnTplunXrZhITE83WrVvN6tWrTatWrczEiRMv+XymTJliunbt6tXrkSNHrPFHH33UREVFmfT0dLNlyxZzww03mBtvvLFBzqVSYWGh13zWrl1rJJnPPvvMGNPw98/q1avNU089Zf79738bSWbFihVe4zNmzDAhISHm/fffN9u2bTN//vOfTXR0tDlx4oRV079/f9O9e3fz9ddfmy+++MJ06NDBDB061BovKSkx4eHhZtiwYSYnJ8e88847Jjg42Lz22muXfE7FxcUmMTHRLF++3OzatctkZGSYuLg406tXL691tG3b1jzzzDNe++30v7uGNCdjjBk+fLjp37+/V79FRUVeNQ1pP51vPqfP4/Dhw+bNN980fn5+Zs+ePVZNQ9pHNXmuro/ntx9++ME0btzYpKWlmZ07d5q5c+eagIAAs2bNmgueA8HmMhYXF2dSU1Ot+xUVFSYyMtJMnz7dh13VTGFhoZFkPv/8c2vZrbfeakaPHn3Wx6xevdr4+/ub/Px8a9nChQuNy+UypaWlF7PdKqZMmWK6d+9e7VhxcbEJDAw07733nrXs22+/NZJMRkaGMaZhzeVsRo8eba6++mrj8XiMMZfX/jnzBcbj8ZiIiAgza9Ysa1lxcbFxOp3mnXfeMcYYs3PnTiPJbN682ar5+OOPjZ+fnzl48KAxxpgFCxaY5s2be81n/PjxpmPHjhd5RlXnVJ1NmzYZSebHH3+0lrVt29bMmTPnrI9paHMaPny4ufPOO8/6mIa8n2qyj+68807zhz/8wWtZQ95HZz5X19fz25NPPmm6du3qta3BgwebpKSkC+6Zt6IuU2VlZcrKylJiYqK1zN/fX4mJicrIyPBhZzVTUlIiSWrRooXX8rfeekutWrVSt27dNHHiRP3666/WWEZGhmJiYrw+EDEpKUlut1u5ubmXpvHT7N69W5GRkWrfvr2GDRum/fv3S5KysrJUXl7utW86deqkNm3aWPumoc3lTGVlZfrXv/6lhx56yOsLWy+n/XO6vXv3Kj8/32ufhISEKD4+3mufhIaGqnfv3lZNYmKi/P39lZmZadX06dNHDofDqklKSlJeXp5+/vnnSzSbsyspKZGfn1+V76SbMWOGWrZsqR49emjWrFlebwk0xDmtX79eYWFh6tixox577DEdPXrUGruc91NBQYFWrVqllJSUKmMNdR+d+VxdX89vGRkZXuuorKmP1y8+efgy9dNPP6mioqLKpx6Hh4dr165dPuqqZjwej8aMGaObbrpJ3bp1s5bfe++9atu2rSIjI7V9+3aNHz9eeXl5+ve//y1Jys/Pr3a+lWOXUnx8vJYsWaKOHTvq8OHDmjZtmm655Rbl5OQoPz9fDoejyotLeHi41WdDmkt13n//fRUXF+uBBx6wll1O++dMlduvrr/T90lYWJjXeKNGjdSiRQuvmujo6CrrqBxr3rz5Rem/Jk6ePKnx48dr6NChXl8++MQTT6hnz55q0aKFNm7cqIkTJ+rw4cOaPXu2pIY3p/79+2vQoEGKjo7Wnj179Pe//10DBgxQRkaGAgICLuv9tHTpUjVr1kyDBg3yWt5Q91F1z9X19fx2thq3260TJ04oODi4zn0TbHDJpaamKicnR19++aXX8pEjR1o/x8TEqHXr1urbt6/27Nmjq6+++lK3eU4DBgywfr7uuusUHx+vtm3b6t13372gP8iG4n/+5380YMAARUZGWssup/3ze1NeXq577rlHxhgtXLjQaywtLc36+brrrpPD4dAjjzyi6dOnN8iP8h8yZIj1c0xMjK677jpdffXVWr9+vfr27evDzi7cm2++qWHDhikoKMhreUPdR2d7rm7oeCvqMtWqVSsFBARUORO9oKBAERERPurq/EaNGqWVK1fqs88+01VXXXXO2vj4eEnS999/L0mKiIiodr6VY74UGhqqa6+9Vt9//70iIiJUVlam4uJir5rT901DnsuPP/6oTz/9VA8//PA56y6n/VO5/XP9vURERKiwsNBr/NSpUyoqKmrQ+60y1Pz4449au3at19Ga6sTHx+vUqVPat2+fpIY5p9O1b99erVq18vp3djnupy+++EJ5eXnn/buSGsY+OttzdX09v52txuVyXfB/Dgk2lymHw6FevXopPT3dWubxeJSenq6EhAQfdlY9Y4xGjRqlFStWaN26dVUOq1YnOztbktS6dWtJUkJCgnbs2OH1pFb5RN6lS5eL0ndNHT9+XHv27FHr1q3Vq1cvBQYGeu2bvLw87d+/39o3DXkuixcvVlhYmJKTk89Zdzntn+joaEVERHjtE7fbrczMTK99UlxcrKysLKtm3bp18ng8VohLSEjQhg0bVF5ebtWsXbtWHTt29MnbG5WhZvfu3fr000/VsmXL8z4mOztb/v7+1ts5DW1OZ/q///s/HT161Ovf2eW2n6TfjoL26tVL3bt3P2+tL/fR+Z6r6+v5LSEhwWsdlTX18vp1wacfw2eWLVtmnE6nWbJkidm5c6cZOXKkCQ0N9ToTvaF47LHHTEhIiFm/fr3XJY2//vqrMcaY77//3jzzzDNmy5YtZu/eveaDDz4w7du3N3369LHWUXkJYb9+/Ux2drZZs2aNueKKK3xyifTf/vY3s379erN3717z1VdfmcTERNOqVStTWFhojPntcsg2bdqYdevWmS1btpiEhASTkJDQIOdyuoqKCtOmTRszfvx4r+WXw/45duyY2bp1q9m6dauRZGbPnm22bt1qXSE0Y8YMExoaaj744AOzfft2c+edd1Z7uXePHj1MZmam+fLLL80111zjdRlxcXGxCQ8PN/fdd5/Jyckxy5YtM40bN75ol0afa05lZWXmz3/+s7nqqqtMdna2199V5ZUnGzduNHPmzDHZ2dlmz5495l//+pe54oorzP33398g53Ts2DHz3//93yYjI8Ps3bvXfPrpp6Znz57mmmuuMSdPnrTW0ZD20/n+3Rnz2+XajRs3NgsXLqzy+Ia2j873XG1M/Ty/VV7uPW7cOPPtt9+a+fPnc7k3fjN37lzTpk0b43A4TFxcnPn666993VK1JFV7W7x4sTHGmP3795s+ffqYFi1aGKfTaTp06GDGjRvn9Tkpxhizb98+M2DAABMcHGxatWpl/va3v5ny8vJLPp/Bgweb1q1bG4fDYa688kozePBg8/3331vjJ06cMP/1X/9lmjdvbho3bmzuuusuc/jwYa91NJS5nO6TTz4xkkxeXp7X8sth/3z22WfV/hsbPny4Mea3S74nTZpkwsPDjdPpNH379q0yz6NHj5qhQ4eapk2bGpfLZR588EFz7Ngxr5pt27aZm2++2TidTnPllVeaGTNm+GROe/fuPevfVeVnD2VlZZn4+HgTEhJigoKCTOfOnc0LL7zgFRIa0px+/fVX069fP3PFFVeYwMBA07ZtWzNixIgq/1lrSPvpfP/ujDHmtddeM8HBwaa4uLjK4xvaPjrfc7Ux9ff89tlnn5nY2FjjcDhM+/btvbZxIfz+30QAAAAue5xjAwAAbINgAwAAbINgAwAAbINgAwAAbINgAwAAbINgAwAAbINgAwAAbINgA+B3bd++ffLz87O+IgLA5Y0P6APwu1ZRUaEjR46oVatWatSoka/bAXCBCDYAfrfKysrkcDh83QaAesRbUQBs47bbbtOoUaM0atQohYSEqFWrVpo0aZIq///Wrl07Pfvss7r//vvlcrk0cuTIat+Kys3N1R133CGXy6VmzZrplltu0Z49e6zxN954Q507d1ZQUJA6deqkBQsWXOqpAjgLjrsCsJWlS5cqJSVFmzZt0pYtWzRy5Ei1adNGI0aMkCT94x//0OTJkzVlypRqH3/w4EH16dNHt912m9atWyeXy6WvvvpKp06dkiS99dZbmjx5subNm6cePXpo69atGjFihJo0aaLhw4dfsnkCqB5vRQGwjdtuu02FhYXKzc2Vn5+fJGnChAn68MMPtXPnTrVr1049evTQihUrrMfs27dP0dHR2rp1q2JjY/X3v/9dy5YtU15engIDA6tso0OHDnr22Wc1dOhQa9lzzz2n1atXa+PGjRd/kgDOibeiANjKDTfcYIUaSUpISNDu3btVUVEhSerdu/c5H5+dna1bbrml2lDzyy+/aM+ePUpJSVHTpk2t23PPPef1VhUA3+GtKAC/K02aNDnneHBw8FnHjh8/Lkn65z//qfj4eK+xgICAC28OwAUj2ACwlczMTK/7X3/9ta655poaB4/rrrtOS5cuVXl5eZWjNuHh4YqMjNQPP/ygYcOG1VvPAOoPb0UBsJX9+/crLS1NeXl5eueddzR37lyNHj26xo8fNWqU3G63hgwZoi1btmj37t363//9X+Xl5UmSpk2bpunTp+vVV1/Vd999px07dmjx4sWaPXv2xZoSgFrgiA0AW7n//vt14sQJxcXFKSAgQKNHj9bIkSNr/PiWLVtq3bp1GjdunG699VYFBAQoNjZWN910kyTp4YcfVuPGjTVr1iyNGzdOTZo0UUxMjMaMGXORZgSgNrgqCoBt3HbbbYqNjdXLL7/s61YA+AhvRQEAANsg2AAAANvgrSgAAGAbHLEBAAC2QbABAAC2QbABAAC2QbABAAC2QbABAAC2QbABAAC2QbABAAC2QbABAAC2QbABAAC28f8D+GWREkjyWscAAAAASUVORK5CYII=\n"
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "y_train_df = np.log1p(y_train_df)\n",
+ "sns.histplot(y_train_df, bins=50)\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 449
+ },
+ "id": "A1Ytthx4B5w_",
+ "outputId": "db662fca-bd7c-4d0a-8ff8-9f9f0f8bbe69"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAGwCAYAAABrUCsdAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAPqhJREFUeJzt3XtYVnW+///XjQiYCYjGaUJlyvGslChhapps0KyJyRw1ppgindxgIrO1LEXTirI0TyRjjYdmZGvuGRlTQwlTm8QDKKOSkjWW7vSG5qtwC5OAcP/+mM36eY+EpwU36PNxXeu6XOvzXmu91900vlpHi91utwsAAAA3zMXZDQAAANwsCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmMTV2Q3cSmpqanT69Gm1adNGFovF2e0AAICrYLfbdf78eQUGBsrFpf5zUgSrRnT69GkFBQU5uw0AAHAdTp06pTvvvLPeGoJVI2rTpo2kf/2D8fT0dHI3AADgathsNgUFBRl/j9eHYNWIai//eXp6EqwAAGhmruY2Hm5eBwAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJO4OrsBAA2vR+8QnTlzpt6agIAAFRzKb5yGAOAmRbACbgFnzpxR5GsZ9dZsezm6UXoBgJsZlwIBAABM4tRgtWvXLj3yyCMKDAyUxWJRRkbGZTVHjx7Vz3/+c3l5eal169bq16+fTp48aYxfuHBB8fHxateunW6//XaNGjVKRUVFDts4efKkRo4cqdtuu02+vr6aOnWqLl686FCzY8cO3XvvvXJ3d9fdd9+tVatWXdZLamqqOnXqJA8PD4WFhWnfvn2m/A4AAODm4NRgVV5erj59+ig1NbXO8a+//loDBw5U165dtWPHDh06dEgzZ86Uh4eHUTNlyhR99NFHWr9+vXbu3KnTp0/rscceM8arq6s1cuRIVVZWavfu3Vq9erVWrVql5ORko+bEiRMaOXKkhg4dqvz8fCUmJurZZ5/V1q1bjZp169YpKSlJs2bN0oEDB9SnTx9FRUWpuLi4AX4ZAADQHFnsdrvd2U1IksVi0YYNGxQdHW0sGzt2rFq2bKk//OEPda5TWlqqO+64Q+np6Xr88cclSceOHVO3bt2Uk5Oj++67Tx9//LEefvhhnT59Wn5+fpKktLQ0vfDCC/r+++/l5uamF154QZs3b9aRI0cc9l1SUqLMzExJUlhYmPr166elS5dKkmpqahQUFKRJkybpxRdfrLO/iooKVVRUGPM2m01BQUEqLS2Vp6fn9f9YwDXyucPvqu6xOvt9Ub01AHArstls8vLyuqq/v5vsPVY1NTXavHmzfvaznykqKkq+vr4KCwtzuFyYl5enqqoqRUREGMu6du2qDh06KCcnR5KUk5OjXr16GaFKkqKiomSz2VRQUGDUXLqN2prabVRWViovL8+hxsXFRREREUZNXVJSUuTl5WVMQUFB1/+DAACAJq/JBqvi4mKVlZXpjTfe0PDhw7Vt2zb94he/0GOPPaadO3dKkqxWq9zc3OTt7e2wrp+fn6xWq1FzaaiqHa8dq6/GZrPphx9+0D/+8Q9VV1fXWVO7jbpMnz5dpaWlxnTq1Klr/yEAAECz0WRft1BTUyNJevTRRzVlyhRJUkhIiHbv3q20tDQ98MADzmzvqri7u8vd3d3ZbQAAgEbSZM9YtW/fXq6ururevbvD8m7duhlPBfr7+6uyslIlJSUONUVFRfL39zdq/v0pwdr5K9V4enqqVatWat++vVq0aFFnTe02AAAAmmywcnNzU79+/VRYWOiw/Msvv1THjh0lSX379lXLli2VnZ1tjBcWFurkyZMKDw+XJIWHh+vw4cMOT+9lZWXJ09PTCG3h4eEO26itqd2Gm5ub+vbt61BTU1Oj7OxsowYAAMCplwLLysr01VdfGfMnTpxQfn6+fHx81KFDB02dOlVjxozR4MGDNXToUGVmZuqjjz7Sjh07JEleXl6Ki4tTUlKSfHx85OnpqUmTJik8PFz33XefJCkyMlLdu3fXk08+qXnz5slqtWrGjBmKj483LtM999xzWrp0qaZNm6ZnnnlG27dv14cffqjNmzcbvSUlJSk2NlahoaHq37+/Fi5cqPLycj399NON94MBAIAmzanBKjc3V0OHDjXmk5KSJEmxsbFatWqVfvGLXygtLU0pKSl6/vnn1aVLF/3pT3/SwIEDjXXeeecdubi4aNSoUaqoqFBUVJTeffddY7xFixbatGmTJk6cqPDwcLVu3VqxsbGaM2eOURMcHKzNmzdrypQpWrRoke688069//77ioqKMmrGjBmj77//XsnJybJarQoJCVFmZuZlN7QDAIBbV5N5j9Wt4FregwGYifdYAcD1uyneYwUAANDcEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCRODVa7du3SI488osDAQFksFmVkZPxo7XPPPSeLxaKFCxc6LD979qxiYmLk6ekpb29vxcXFqayszKHm0KFDGjRokDw8PBQUFKR58+Zdtv3169era9eu8vDwUK9evbRlyxaHcbvdruTkZAUEBKhVq1aKiIjQ8ePHr/vYAQDAzcepwaq8vFx9+vRRampqvXUbNmzQnj17FBgYeNlYTEyMCgoKlJWVpU2bNmnXrl2aMGGCMW6z2RQZGamOHTsqLy9Pb731lmbPnq3ly5cbNbt379a4ceMUFxengwcPKjo6WtHR0Tpy5IhRM2/ePC1evFhpaWnau3evWrduraioKF24cMGEXwIAANwMLHa73e7sJiTJYrFow4YNio6Odlj+3XffKSwsTFu3btXIkSOVmJioxMRESdLRo0fVvXt37d+/X6GhoZKkzMxMPfTQQ/rf//1fBQYGatmyZXr55ZdltVrl5uYmSXrxxReVkZGhY8eOSZLGjBmj8vJybdq0ydjvfffdp5CQEKWlpclutyswMFC//e1v9V//9V+SpNLSUvn5+WnVqlUaO3bsVR2jzWaTl5eXSktL5enpeSM/F3BNfO7wU+RrGfXWbHs5Wme/L2qchgCgGbmWv7+b9D1WNTU1evLJJzV16lT16NHjsvGcnBx5e3sboUqSIiIi5OLior179xo1gwcPNkKVJEVFRamwsFDnzp0zaiIiIhy2HRUVpZycHEnSiRMnZLVaHWq8vLwUFhZm1NSloqJCNpvNYQIAADevJh2s3nzzTbm6uur555+vc9xqtcrX19dhmaurq3x8fGS1Wo0aPz8/h5ra+SvVXDp+6Xp11dQlJSVFXl5exhQUFFTv8QIAgOatyQarvLw8LVq0SKtWrZLFYnF2O9dl+vTpKi0tNaZTp045uyUAANCAmmyw+uyzz1RcXKwOHTrI1dVVrq6u+vbbb/Xb3/5WnTp1kiT5+/uruLjYYb2LFy/q7Nmz8vf3N2qKihzvG6mdv1LNpeOXrldXTV3c3d3l6enpMAEAgJtXkw1WTz75pA4dOqT8/HxjCgwM1NSpU7V161ZJUnh4uEpKSpSXl2est337dtXU1CgsLMyo2bVrl6qqqoyarKwsdenSRW3btjVqsrOzHfaflZWl8PBwSVJwcLD8/f0damw2m/bu3WvUAAAAuDpz52VlZfrqq6+M+RMnTig/P18+Pj7q0KGD2rVr51DfsmVL+fv7q0uXLpKkbt26afjw4Ro/frzS0tJUVVWlhIQEjR071ng1wxNPPKFXXnlFcXFxeuGFF3TkyBEtWrRI77zzjrHdyZMn64EHHtD8+fM1cuRIrV27Vrm5ucYrGSwWixITE/Xqq6+qc+fOCg4O1syZMxUYGHjZU4wAAODW5dRglZubq6FDhxrzSUlJkqTY2FitWrXqqraxZs0aJSQkaNiwYXJxcdGoUaO0ePFiY9zLy0vbtm1TfHy8+vbtq/bt2ys5OdnhXVcDBgxQenq6ZsyYoZdeekmdO3dWRkaGevbsadRMmzZN5eXlmjBhgkpKSjRw4EBlZmbKw8PjBn8FAABws2gy77G6FfAeKzgL77ECgOt307zHCgAAoDkhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASVyd3QAA1KVH7xCdOXOm3pqAgAAVHMpvnIYA4CoQrAA0SWfOnFHkaxn11mx7ObpRegGAq8WlQAAAAJMQrAAAAExCsAIAADAJwQoAAMAk3LwOwFQ8zQfgVkawAmAqnuYDcCvjUiAAAIBJCFYAAAAmIVgBAACYhGAFAABgEqcGq127dumRRx5RYGCgLBaLMjIyjLGqqiq98MIL6tWrl1q3bq3AwEA99dRTOn36tMM2zp49q5iYGHl6esrb21txcXEqKytzqDl06JAGDRokDw8PBQUFad68eZf1sn79enXt2lUeHh7q1auXtmzZ4jBut9uVnJysgIAAtWrVShERETp+/Lh5PwYAAGj2nBqsysvL1adPH6Wmpl429s9//lMHDhzQzJkzdeDAAf35z39WYWGhfv7znzvUxcTEqKCgQFlZWdq0aZN27dqlCRMmGOM2m02RkZHq2LGj8vLy9NZbb2n27Nlavny5UbN7926NGzdOcXFxOnjwoKKjoxUdHa0jR44YNfPmzdPixYuVlpamvXv3qnXr1oqKitKFCxca4JcBAADNkVNftzBixAiNGDGizjEvLy9lZWU5LFu6dKn69++vkydPqkOHDjp69KgyMzO1f/9+hYaGSpKWLFmihx56SG+//bYCAwO1Zs0aVVZWasWKFXJzc1OPHj2Un5+vBQsWGAFs0aJFGj58uKZOnSpJmjt3rrKysrR06VKlpaXJbrdr4cKFmjFjhh599FFJ0gcffCA/Pz9lZGRo7NixDfUTAQCAZqRZ3WNVWloqi8Uib29vSVJOTo68vb2NUCVJERERcnFx0d69e42awYMHy83NzaiJiopSYWGhzp07Z9REREQ47CsqKko5OTmSpBMnTshqtTrUeHl5KSwszKipS0VFhWw2m8MEAABuXs0mWF24cEEvvPCCxo0bJ09PT0mS1WqVr6+vQ52rq6t8fHxktVqNGj8/P4ea2vkr1Vw6ful6ddXUJSUlRV5eXsYUFBR0TccMAACal2YRrKqqqvTLX/5Sdrtdy5Ytc3Y7V2369OkqLS01plOnTjm7JQAA0ICa/CdtakPVt99+q+3btxtnqyTJ399fxcXFDvUXL17U2bNn5e/vb9QUFRU51NTOX6nm0vHaZQEBAQ41ISEhP9q7u7u73N3dr+VwAaexnS+Tzx1+9dbwjT8AqF+TDla1oer48eP69NNP1a5dO4fx8PBwlZSUKC8vT3379pUkbd++XTU1NQoLCzNqXn75ZVVVVally5aSpKysLHXp0kVt27Y1arKzs5WYmGhsOysrS+Hh4ZKk4OBg+fv7Kzs72whSNptNe/fu1cSJExvyJwAajb2mhm/8AcANcuqlwLKyMuXn5ys/P1/Sv24Sz8/P18mTJ1VVVaXHH39cubm5WrNmjaqrq2W1WmW1WlVZWSlJ6tatm4YPH67x48dr3759+vzzz5WQkKCxY8cqMDBQkvTEE0/Izc1NcXFxKigo0Lp167Ro0SIlJSUZfUyePFmZmZmaP3++jh07ptmzZys3N1cJCQmSJIvFosTERL366qvauHGjDh8+rKeeekqBgYGKjo5u1N8MAAA0XU49Y5Wbm6uhQ4ca87VhJzY2VrNnz9bGjRsl6bLLbZ9++qmGDBkiSVqzZo0SEhI0bNgwubi4aNSoUVq8eLFR6+XlpW3btik+Pl59+/ZV+/btlZyc7PCuqwEDBig9PV0zZszQSy+9pM6dOysjI0M9e/Y0aqZNm6by8nJNmDBBJSUlGjhwoDIzM+Xh4WH2zwIAAJoppwarIUOGyG63/+h4fWO1fHx8lJ6eXm9N79699dlnn9VbM3r0aI0ePfpHxy0Wi+bMmaM5c+ZcsScAjYP7wgA0NU36HisAqA/3hQFoaprF6xYAAACaA4IVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEl43QKAq3Y1742ynT/fSN0AQNNDsAJw1a7mvVHrEx5snGYAoAniUiAAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJjEqcFq165deuSRRxQYGCiLxaKMjAyHcbvdruTkZAUEBKhVq1aKiIjQ8ePHHWrOnj2rmJgYeXp6ytvbW3FxcSorK3OoOXTokAYNGiQPDw8FBQVp3rx5l/Wyfv16de3aVR4eHurVq5e2bNlyzb0AAIBbm1ODVXl5ufr06aPU1NQ6x+fNm6fFixcrLS1Ne/fuVevWrRUVFaULFy4YNTExMSooKFBWVpY2bdqkXbt2acKECca4zWZTZGSkOnbsqLy8PL311luaPXu2li9fbtTs3r1b48aNU1xcnA4ePKjo6GhFR0fryJEj19QLAAC4tbk6c+cjRozQiBEj6hyz2+1auHChZsyYoUcffVSS9MEHH8jPz08ZGRkaO3asjh49qszMTO3fv1+hoaGSpCVLluihhx7S22+/rcDAQK1Zs0aVlZVasWKF3Nzc1KNHD+Xn52vBggVGAFu0aJGGDx+uqVOnSpLmzp2rrKwsLV26VGlpaVfVCwAAQJO9x+rEiROyWq2KiIgwlnl5eSksLEw5OTmSpJycHHl7exuhSpIiIiLk4uKivXv3GjWDBw+Wm5ubURMVFaXCwkKdO3fOqLl0P7U1tfu5ml7qUlFRIZvN5jABkGzny+Rzh1+9k+38eWe3CQDXzKlnrOpjtVolSX5+fg7L/fz8jDGr1SpfX1+HcVdXV/n4+DjUBAcHX7aN2rG2bdvKarVecT9X6qUuKSkpeuWVV658sMAtxl5To8jXMuqtWZ/wYOM0AwAmarJnrG4G06dPV2lpqTGdOnXK2S0BAIAG1GSDlb+/vySpqKjIYXlRUZEx5u/vr+LiYofxixcv6uzZsw41dW3j0n38WM2l41fqpS7u7u7y9PR0mAAAwM2ryQar4OBg+fv7Kzs721hms9m0d+9ehYeHS5LCw8NVUlKivLw8o2b79u2qqalRWFiYUbNr1y5VVVUZNVlZWerSpYvatm1r1Fy6n9qa2v1cTS8AAABODVZlZWXKz89Xfn6+pH/dJJ6fn6+TJ0/KYrEoMTFRr776qjZu3KjDhw/rqaeeUmBgoKKjoyVJ3bp10/DhwzV+/Hjt27dPn3/+uRISEjR27FgFBgZKkp544gm5ubkpLi5OBQUFWrdunRYtWqSkpCSjj8mTJyszM1Pz58/XsWPHNHv2bOXm5iohIUGSrqoXAAAAp968npubq6FDhxrztWEnNjZWq1at0rRp01ReXq4JEyaopKREAwcOVGZmpjw8PIx11qxZo4SEBA0bNkwuLi4aNWqUFi9ebIx7eXlp27Ztio+PV9++fdW+fXslJyc7vOtqwIABSk9P14wZM/TSSy+pc+fOysjIUM+ePY2aq+kFAADc2pwarIYMGSK73f6j4xaLRXPmzNGcOXN+tMbHx0fp6en17qd379767LPP6q0ZPXq0Ro8efUO9AACAW1uTvccKAACguSFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJjkuoLVT3/6U/2///f/LlteUlKin/70pzfcFAAAQHN0XcHqm2++UXV19WXLKyoq9N13391wUwAAAM3RNb0gdOPGjcaft27dKi8vL2O+urpa2dnZ6tSpk2nNAQAANCfXFKxqv4tnsVgUGxvrMNayZUt16tRJ8+fPN605AACA5uSaglVNTY0kKTg4WPv371f79u0bpCkAAIDm6Lq+FXjixAmz+wAAAGj2rvsjzNnZ2crOzlZxcbFxJqvWihUrbrgxAACA5ua6gtUrr7yiOXPmKDQ0VAEBAbJYLGb3BQAA0OxcV7BKS0vTqlWr9OSTT5rdDwAAQLN1Xe+xqqys1IABA8zuBQAAoFm7rmD17LPPKj093exeAMB0tvNl8rnDr96pR+8QZ7cJ4CZxXZcCL1y4oOXLl+uTTz5R79691bJlS4fxBQsWmNIcANwoe02NIl/LqLdm28vRjdILgJvfdQWrQ4cOKSQkRJJ05MgRhzFuZAcAALeq6wpWn376qdl9AAAANHvXdY8VAAAALnddZ6yGDh1a7yW/7du3X3dDAAAAzdV1Bava+6tqVVVVKT8/X0eOHLns48wAAAC3iusKVu+8806dy2fPnq2ysrIbaggAAKC5MvUeq1/96ld8JxAAANyyTA1WOTk58vDwMHOTAAAAzcZ1XQp87LHHHObtdrvOnDmj3NxczZw505TGAAAAmpvrClZeXl4O8y4uLurSpYvmzJmjyMhIUxoDAABobq4rWK1cudLsPgAAAJq96wpWtfLy8nT06FFJUo8ePXTPPfeY0hQAAEBzdF3Bqri4WGPHjtWOHTvk7e0tSSopKdHQoUO1du1a3XHHHWb2CAAA0Cxc11OBkyZN0vnz51VQUKCzZ8/q7NmzOnLkiGw2m55//nmzewQAAGgWritYZWZm6t1331W3bt2MZd27d1dqaqo+/vhj05qrrq7WzJkzFRwcrFatWumuu+7S3LlzZbfbjRq73a7k5GQFBASoVatWioiI0PHjxx22c/bsWcXExMjT01Pe3t6Ki4u77EWmhw4d0qBBg+Th4aGgoCDNmzfvsn7Wr1+vrl27ysPDQ7169dKWLVtMO1YAAND8XVewqqmpUcuWLS9b3rJlS9XU1NxwU7XefPNNLVu2TEuXLtXRo0f15ptvat68eVqyZIlRM2/ePC1evFhpaWnau3evWrduraioKF24cMGoiYmJUUFBgbKysrRp0ybt2rVLEyZMMMZtNpsiIyPVsWNH5eXl6a233tLs2bO1fPlyo2b37t0aN26c4uLidPDgQUVHRys6OlpHjhwx7XgBAEDzdl3B6sEHH9TkyZN1+vRpY9l3332nKVOmaNiwYaY1t3v3bj366KMaOXKkOnXqpMcff1yRkZHat2+fpH+drVq4cKFmzJihRx99VL1799YHH3yg06dPKyMjQ5J09OhRZWZm6v3331dYWJgGDhyoJUuWaO3atUb/a9asUWVlpVasWKEePXpo7Nixev7557VgwQKjl0WLFmn48OGaOnWqunXrprlz5+ree+/V0qVLTTteAADQvF1XsFq6dKlsNps6deqku+66S3fddZeCg4Nls9kczibdqAEDBig7O1tffvmlJOlvf/ub/vrXv2rEiBGSpBMnTshqtSoiIsJYx8vLS2FhYcrJyZH0r7fBe3t7KzQ01KiJiIiQi4uL9u7da9QMHjxYbm5uRk1UVJQKCwt17tw5o+bS/dTW1O6nLhUVFbLZbA4TAAC4eV3XU4FBQUE6cOCAPvnkEx07dkyS1K1bt8uCx4168cUXZbPZ1LVrV7Vo0ULV1dV67bXXFBMTI0myWq2SJD8/P4f1/Pz8jDGr1SpfX1+HcVdXV/n4+DjUBAcHX7aN2rG2bdvKarXWu5+6pKSk6JVXXrnWwwYAAM3UNZ2x2r59u7p37y6bzSaLxaL/+I//0KRJkzRp0iT169dPPXr00GeffWZacx9++KHWrFmj9PR0HThwQKtXr9bbb7+t1atXm7aPhjR9+nSVlpYa06lTp5zdEgAAaEDXdMZq4cKFGj9+vDw9PS8b8/Ly0m9+8xstWLBAgwYNMqW5qVOn6sUXX9TYsWMlSb169dK3336rlJQUxcbGyt/fX5JUVFSkgIAAY72ioiKFhIRIkvz9/VVcXOyw3YsXL+rs2bPG+v7+/ioqKnKoqZ2/Uk3teF3c3d3l7u5+rYcNAACaqWs6Y/W3v/1Nw4cP/9HxyMhI5eXl3XBTtf75z3/KxcWxxRYtWhhPHgYHB8vf31/Z2dnGuM1m0969exUeHi5JCg8PV0lJiUNf27dvV01NjcLCwoyaXbt2qaqqyqjJyspSly5d1LZtW6Pm0v3U1tTuBwAA4JqCVVFRUZ2vWajl6uqq77///oabqvXII4/otdde0+bNm/XNN99ow4YNWrBggX7xi19IkiwWixITE/Xqq69q48aNOnz4sJ566ikFBgYqOjpa0r/u/Ro+fLjGjx+vffv26fPPP1dCQoLGjh2rwMBASdITTzwhNzc3xcXFqaCgQOvWrdOiRYuUlJRk9DJ58mRlZmZq/vz5OnbsmGbPnq3c3FwlJCSYdrwAAKB5u6ZLgT/5yU905MgR3X333XWOHzp0yOGS3I1asmSJZs6cqf/8z/9UcXGxAgMD9Zvf/EbJyclGzbRp01ReXq4JEyaopKREAwcOVGZmpjw8PIyaNWvWKCEhQcOGDZOLi4tGjRqlxYsXG+NeXl7atm2b4uPj1bdvX7Vv317JyckO77oaMGCA0tPTNWPGDL300kvq3LmzMjIy1LNnT9OOFwAANG/XFKweeughzZw5U8OHD3cILpL0ww8/aNasWXr44YdNa65NmzZauHChFi5c+KM1FotFc+bM0Zw5c360xsfHR+np6fXuq3fv3le88X706NEaPXp0vTUAAODWdU3BasaMGfrzn/+sn/3sZ0pISFCXLl0kSceOHVNqaqqqq6v18ssvN0ijAAAATd01BSs/Pz/t3r1bEydO1PTp041v9lksFkVFRSk1NfWydz0BAADcKq75BaEdO3bUli1bdO7cOX311Vey2+3q3Lmz8fQcAADAreq63rwuSW3btlW/fv3M7AUAAKBZu65vBQIAAOByBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABM4ursBgDA2Wzny+Rzh1+9NQEBASo4lN84DQFotghWAG559poaRb6WUW/NtpejG6UXAM1bk78U+N133+lXv/qV2rVrp1atWqlXr17Kzc01xu12u5KTkxUQEKBWrVopIiJCx48fd9jG2bNnFRMTI09PT3l7eysuLk5lZWUONYcOHdKgQYPk4eGhoKAgzZs377Je1q9fr65du8rDw0O9evXSli1bGuaggWvQo3eIfO7wq3eynT/v7DYB4JbQpM9YnTt3Tvfff7+GDh2qjz/+WHfccYeOHz+utm3bGjXz5s3T4sWLtXr1agUHB2vmzJmKiorSF198IQ8PD0lSTEyMzpw5o6ysLFVVVenpp5/WhAkTlJ6eLkmy2WyKjIxURESE0tLSdPjwYT3zzDPy9vbWhAkTJEm7d+/WuHHjlJKSoocffljp6emKjo7WgQMH1LNnz8b/cYD/c+bMmSuebVmf8GDjNAMAt7gmHazefPNNBQUFaeXKlcay4OBg4892u10LFy7UjBkz9Oijj0qSPvjgA/n5+SkjI0Njx47V0aNHlZmZqf379ys0NFSStGTJEj300EN6++23FRgYqDVr1qiyslIrVqyQm5ubevToofz8fC1YsMAIVosWLdLw4cM1depUSdLcuXOVlZWlpUuXKi0trc7+KyoqVFFRYczbbDZzfyAAANCkNOlLgRs3blRoaKhGjx4tX19f3XPPPXrvvfeM8RMnTshqtSoiIsJY5uXlpbCwMOXk5EiScnJy5O3tbYQqSYqIiJCLi4v27t1r1AwePFhubm5GTVRUlAoLC3Xu3Dmj5tL91NbU7qcuKSkp8vLyMqagoKAb+DUAAEBT16SD1d///nctW7ZMnTt31tatWzVx4kQ9//zzWr16tSTJarVKkvz8HJ/m8fPzM8asVqt8fX0dxl1dXeXj4+NQU9c2Lt3Hj9XUjtdl+vTpKi0tNaZTp05d0/EDAIDmpUlfCqypqVFoaKhef/11SdI999yjI0eOKC0tTbGxsU7u7src3d3l7u7u7DYAAEAjadLBKiAgQN27d3dY1q1bN/3pT3+SJPn7+0uSioqKFBAQYNQUFRUpJCTEqCkuLnbYxsWLF3X27FljfX9/fxUVFTnU1M5fqaZ2HAB69A7RmTNn6q3hfVjAza1JB6v7779fhYWFDsu+/PJLdezYUdK/bmT39/dXdna2EaRsNpv27t2riRMnSpLCw8NVUlKivLw89e3bV5K0fft21dTUKCwszKh5+eWXVVVVpZYtW0qSsrKy1KVLF+MJxPDwcGVnZysxMdHoJSsrS+Hh4Q12/ACal6t5QpP3YQE3tyZ9j9WUKVO0Z88evf766/rqq6+Unp6u5cuXKz4+XpJksViUmJioV199VRs3btThw4f11FNPKTAwUNHR0ZL+dYZr+PDhGj9+vPbt26fPP/9cCQkJGjt2rAIDAyVJTzzxhNzc3BQXF6eCggKtW7dOixYtUlJSktHL5MmTlZmZqfnz5+vYsWOaPXu2cnNzlZCQ0Oi/CwAAaJqa9Bmrfv36acOGDZo+fbrmzJmj4OBgLVy4UDExMUbNtGnTVF5ergkTJqikpEQDBw5UZmam8Q4rSVqzZo0SEhI0bNgwubi4aNSoUVq8eLEx7uXlpW3btik+Pl59+/ZV+/btlZycbLxqQZIGDBig9PR0zZgxQy+99JI6d+6sjIwM3mEFAAAMTTpYSdLDDz+shx9++EfHLRaL5syZozlz5vxojY+Pj/Ey0B/Tu3dvffbZZ/XWjB49WqNHj66/YQAAcMtq0pcCAQAAmhOCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgkib/5nXgVtajd4jOnDlTb43t/PlG6gYAcCUEK6AJO3PmjCJfy6i3Zn3Cg43TDADgirgUCAAAYBLOWAHAVbCdL5PPHX5XqOGyLHCrI1gBwFWw19RwWRbAFXEpEAAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTNKtg9cYbb8hisSgxMdFYduHCBcXHx6tdu3a6/fbbNWrUKBUVFTmsd/LkSY0cOVK33XabfH19NXXqVF28eNGhZseOHbr33nvl7u6uu+++W6tWrbps/6mpqerUqZM8PDwUFhamffv2NcRhAgCAZqrZBKv9+/frd7/7nXr37u2wfMqUKfroo4+0fv167dy5U6dPn9Zjjz1mjFdXV2vkyJGqrKzU7t27tXr1aq1atUrJyclGzYkTJzRy5EgNHTpU+fn5SkxM1LPPPqutW7caNevWrVNSUpJmzZqlAwcOqE+fPoqKilJxcXHDHzwAAGgWmkWwKisrU0xMjN577z21bdvWWF5aWqrf//73WrBggR588EH17dtXK1eu1O7du7Vnzx5J0rZt2/TFF1/oj3/8o0JCQjRixAjNnTtXqampqqyslCSlpaUpODhY8+fPV7du3ZSQkKDHH39c77zzjrGvBQsWaPz48Xr66afVvXt3paWl6bbbbtOKFSt+tO+KigrZbDaHCQAA3LyaRbCKj4/XyJEjFRER4bA8Ly9PVVVVDsu7du2qDh06KCcnR5KUk5OjXr16yc/Pz6iJioqSzWZTQUGBUfPv246KijK2UVlZqby8PIcaFxcXRUREGDV1SUlJkZeXlzEFBQVd5y8AAACagyYfrNauXasDBw4oJSXlsjGr1So3Nzd5e3s7LPfz85PVajVqLg1VteO1Y/XV2Gw2/fDDD/rHP/6h6urqOmtqt1GX6dOnq7S01JhOnTp1dQcNAACaJVdnN1CfU6dOafLkycrKypKHh4ez27lm7u7ucnd3d3YbAACgkTTpM1Z5eXkqLi7WvffeK1dXV7m6umrnzp1avHixXF1d5efnp8rKSpWUlDisV1RUJH9/f0mSv7//ZU8J1s5fqcbT01OtWrVS+/bt1aJFizprarcBAADQpIPVsGHDdPjwYeXn5xtTaGioYmJijD+3bNlS2dnZxjqFhYU6efKkwsPDJUnh4eE6fPiww9N7WVlZ8vT0VPfu3Y2aS7dRW1O7DTc3N/Xt29ehpqamRtnZ2UYNAABAk74U2KZNG/Xs2dNhWevWrdWuXTtjeVxcnJKSkuTj4yNPT09NmjRJ4eHhuu+++yRJkZGR6t69u5588knNmzdPVqtVM2bMUHx8vHGZ7rnnntPSpUs1bdo0PfPMM9q+fbs+/PBDbd682dhvUlKSYmNjFRoaqv79+2vhwoUqLy/X008/3Ui/BgAAaOqadLC6Gu+8845cXFw0atQoVVRUKCoqSu+++64x3qJFC23atEkTJ05UeHi4WrdurdjYWM2ZM8eoCQ4O1ubNmzVlyhQtWrRId955p95//31FRUUZNWPGjNH333+v5ORkWa1WhYSEKDMz87Ib2gEAwK2r2QWrHTt2OMx7eHgoNTVVqampP7pOx44dtWXLlnq3O2TIEB08eLDemoSEBCUkJFx1rwAA4NbSpO+xAgAAaE4IVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASZrdm9eBm0WP3iE6c+ZMvTW28+cbqRsAgBkIVoCTnDlzRpGvZdRbsz7hwcZpBgBgCoIV0AA4GwUAtyaCFdAAOBsFALcmbl4HAAAwCcEKAADAJAQrAAAAk3CPFQA0Itv5Mvnc4VdvzT9/uKDbWnnUWxMQEKCCQ/kmdgbADAQrAGhE9pqaq3qwIXJBZr01216ONq8pAKbhUiAAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEbwUCQDN0NR9z5kPNQOMjWAFAM3Q1H3PmQ81A4+NSIAAAgEmafLBKSUlRv3791KZNG/n6+io6OlqFhYUONRcuXFB8fLzatWun22+/XaNGjVJRUZFDzcmTJzVy5Ejddttt8vX11dSpU3Xx4kWHmh07dujee++Vu7u77r77bq1ateqyflJTU9WpUyd5eHgoLCxM+/btM/2YAQBA89Tkg9XOnTsVHx+vPXv2KCsrS1VVVYqMjFR5eblRM2XKFH300Udav369du7cqdOnT+uxxx4zxqurqzVy5EhVVlZq9+7dWr16tVatWqXk5GSj5sSJExo5cqSGDh2q/Px8JSYm6tlnn9XWrVuNmnXr1ikpKUmzZs3SgQMH1KdPH0VFRam4uLhxfgwAANCkNfl7rDIzMx3mV61aJV9fX+Xl5Wnw4MEqLS3V73//e6Wnp+vBBx+UJK1cuVLdunXTnj17dN9992nbtm364osv9Mknn8jPz08hISGaO3euXnjhBc2ePVtubm5KS0tTcHCw5s+fL0nq1q2b/vrXv+qdd95RVFSUJGnBggUaP368nn76aUlSWlqaNm/erBUrVujFF19sxF8FAAA0RU3+jNW/Ky0tlST5+PhIkvLy8lRVVaWIiAijpmvXrurQoYNycnIkSTk5OerVq5f8/P7/J2iioqJks9lUUFBg1Fy6jdqa2m1UVlYqLy/PocbFxUURERFGzb+rqKiQzWZzmAAAwM2rWQWrmpoaJSYm6v7771fPnj0lSVarVW5ubvL29nao9fPzk9VqNWouDVW147Vj9dXYbDb98MMP+sc//qHq6uo6a2q38e9SUlLk5eVlTEFBQdd34AAAoFloVsEqPj5eR44c0dq1a53dylWZPn26SktLjenUqVPObgkAADSgJn+PVa2EhARt2rRJu3bt0p133mks9/f3V2VlpUpKShzOWhUVFcnf39+o+fen92qfGry05t+fJCwqKpKnp6datWqlFi1aqEWLFnXW1G7j37m7u8vd3f36DhhO0aN3iM6cOVNvDS9dBAD8mCYfrOx2uyZNmqQNGzZox44dCg4Odhjv27evWrZsqezsbI0aNUqSVFhYqJMnTyo8PFySFB4ertdee03FxcXy9fWVJGVlZcnT01Pdu3c3arZs2eKw7aysLGMbbm5u6tu3r7KzsxUdHS3pX5cms7OzlZCQ0GDHj8Z15swZXroIALhuTT5YxcfHKz09XX/5y1/Upk0b434mLy8vtWrVSl5eXoqLi1NSUpJ8fHzk6empSZMmKTw8XPfdd58kKTIyUt27d9eTTz6pefPmyWq1asaMGYqPjzfOKD333HNaunSppk2bpmeeeUbbt2/Xhx9+qM2bNxu9JCUlKTY2VqGhoerfv78WLlyo8vJy4ylBAABwa2vywWrZsmWSpCFDhjgsX7lypX79619Lkt555x25uLho1KhRqqioUFRUlN59912jtkWLFtq0aZMmTpyo8PBwtW7dWrGxsZozZ45RExwcrM2bN2vKlClatGiR7rzzTr3//vvGqxYkacyYMfr++++VnJwsq9WqkJAQZWZmXnZDOwAAuDU1+WBlt9uvWOPh4aHU1FSlpqb+aE3Hjh0vu9T374YMGaKDBw/WW5OQkMClPwAAUKdm9VQgAABAU9bkz1gBTY3tfJl87qj/8q/t/PlG6gYA0JQQrIBrZK+pueKTg+sTHmycZgAATQqXAgEAAExCsAIAADAJwQoAAMAk3GMFADepq3nQgk80AeYiWAHATepqHrT4n+cjCF+AiQhWAHALu5rwxfcxgavHPVYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACbhzesAgHrxzUHg6hGsAAD14rM3wNUjWAEAbhhntYB/IVgBAG4YZ7WAf+HmdQAAAJMQrAAAAEzCpUAAQKPgPizcCghWAIBGwX1YuBVwKRAAAMAknLECADQZXC5Ec0ewAgA0GVwuRHNHsAIANCuc1UJTRrACADQrnNVCU0awAgDcdDirBWchWF2j1NRUvfXWW7JarerTp4+WLFmi/v37O7stAMAlruas1v88H0H4gukIVtdg3bp1SkpKUlpamsLCwrRw4UJFRUWpsLBQvr6+zm4PAHANCF9oCASra7BgwQKNHz9eTz/9tCQpLS1Nmzdv1ooVK/Tiiy86uTupR+8QnTlzpt4a/g8AAK4e4QvXimB1lSorK5WXl6fp06cby1xcXBQREaGcnJw616moqFBFRYUxX1paKkmy2WwN0uPp777Tg8n/XW/N9jnjGmz/NwN7TY2qfiivv8Zup4Yaaqgx1FRXa+iMNfXWZEz7udq2u6Pemn9eqNBtHu5NpsbP31/7cj6vt+ZWUfv3pt1uv3KxHVflu+++s0uy796922H51KlT7f37969znVmzZtklMTExMTExMd0E06lTp66YFzhj1YCmT5+upKQkY76mpkZnz55Vu3btZLFYTN2XzWZTUFCQTp06JU9PT1O33dRwrDenW+lYpVvreDnWm9etcrx2u13nz59XYGDgFWsJVlepffv2atGihYqKihyWFxUVyd/fv8513N3d5e7ueKrV29u7oVqUJHl6et7U/+O+FMd6c7qVjlW6tY6XY7153QrH6+XldVV1fIT5Krm5ualv377Kzs42ltXU1Cg7O1vh4eFO7AwAADQVnLG6BklJSYqNjVVoaKj69++vhQsXqry83HhKEAAA3NoIVtdgzJgx+v7775WcnCyr1aqQkBBlZmbKz6/+x2wbg7u7u2bNmnXZpcebEcd6c7qVjlW6tY6XY7153WrHezUsdvvVPDsIAACAK+EeKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBKubQGpqqjp16iQPDw+FhYVp3759zm6pQezatUuPPPKIAgMDZbFYlJGR4eyWGkxKSor69eunNm3ayNfXV9HR0SosLHR2Ww1i2bJl6t27t/GCwfDwcH388cfObqtRvPHGG7JYLEpMTHR2Kw1i9uzZslgsDlPXrl2d3VaD+e677/SrX/1K7dq1U6tWrdSrVy/l5uY6uy3TderU6bJ/rhaLRfHx8c5urUkgWDVz69atU1JSkmbNmqUDBw6oT58+ioqKUnFxsbNbM115ebn69Omj1NRUZ7fS4Hbu3Kn4+Hjt2bNHWVlZqqqqUmRkpMrL6/8YbHN055136o033lBeXp5yc3P14IMP6tFHH1VBQYGzW2tQ+/fv1+9+9zv17t3b2a00qB49eujMmTPG9Ne//tXZLTWIc+fO6f7771fLli318ccf64svvtD8+fPVtm1bZ7dmuv379zv8M83KypIkjR492smdNRE3/nliOFP//v3t8fHxxnx1dbU9MDDQnpKS4sSuGp4k+4YNG5zdRqMpLi62S7Lv3LnT2a00irZt29rff/99Z7fRYM6fP2/v3LmzPSsry/7AAw/YJ0+e7OyWGsSsWbPsffr0cXYbjeKFF16wDxw40NltOMXkyZPtd911l72mpsbZrTQJnLFqxiorK5WXl6eIiAhjmYuLiyIiIpSTk+PEzmC20tJSSZKPj4+TO2lY1dXVWrt2rcrLy2/qT0XFx8dr5MiRDv/u3qyOHz+uwMBA/fSnP1VMTIxOnjzp7JYaxMaNGxUaGqrRo0fL19dX99xzj9577z1nt9XgKisr9cc//lHPPPOMLBaLs9tpEghWzdg//vEPVVdXX/bmdz8/P1mtVid1BbPV1NQoMTFR999/v3r27OnsdhrE4cOHdfvtt8vd3V3PPfecNmzYoO7duzu7rQaxdu1aHThwQCkpKc5upcGFhYVp1apVyszM1LJly3TixAkNGjRI58+fd3Zrpvv73/+uZcuWqXPnztq6dasmTpyo559/XqtXr3Z2aw0qIyNDJSUl+vWvf+3sVpoMPmkDNHHx8fE6cuTITXtviiR16dJF+fn5Ki0t1f/8z/8oNjZWO3fuvOnC1alTpzR58mRlZWXJw8PD2e00uBEjRhh/7t27t8LCwtSxY0d9+OGHiouLc2Jn5qupqVFoaKhef/11SdI999yjI0eOKC0tTbGxsU7uruH8/ve/14gRIxQYGOjsVpoMzlg1Y+3bt1eLFi1UVFTksLyoqEj+/v5O6gpmSkhI0KZNm/Tpp5/qzjvvdHY7DcbNzU133323+vbtq5SUFPXp00eLFi1ydlumy8vLU3Fxse699165urrK1dVVO3fu1OLFi+Xq6qrq6mpnt9igvL299bOf/UxfffWVs1sxXUBAwGX/IdCtW7eb9tKnJH377bf65JNP9Oyzzzq7lSaFYNWMubm5qW/fvsrOzjaW1dTUKDs7+6a+P+VWYLfblZCQoA0bNmj79u0KDg52dkuNqqamRhUVFc5uw3TDhg3T4cOHlZ+fb0yhoaGKiYlRfn6+WrRo4ewWG1RZWZm+/vprBQQEOLsV091///2XvRLlyy+/VMeOHZ3UUcNbuXKlfH19NXLkSGe30qRwKbCZS0pKUmxsrEJDQ9W/f38tXLhQ5eXlevrpp53dmunKysoc/kv3xIkTys/Pl4+Pjzp06ODEzswXHx+v9PR0/eUvf1GbNm2Me+a8vLzUqlUrJ3dnrunTp2vEiBHq0KGDzp8/r/T0dO3YsUNbt251dmuma9OmzWX3ybVu3Vrt2rW7Ke+f+6//+i898sgj6tixo06fPq1Zs2apRYsWGjdunLNbM92UKVM0YMAAvf766/rlL3+pffv2afny5Vq+fLmzW2sQNTU1WrlypWJjY+XqSpRw4OzHEnHjlixZYu/QoYPdzc3N3r9/f/uePXuc3VKD+PTTT+2SLptiY2Od3Zrp6jpOSfaVK1c6uzXTPfPMM/aOHTva3dzc7HfccYd92LBh9m3btjm7rUZzM79uYcyYMfaAgAC7m5ub/Sc/+Yl9zJgx9q+++srZbTWYjz76yN6zZ0+7u7u7vWvXrvbly5c7u6UGs3XrVrske2FhobNbaXIsdrvd7pxIBwAAcHPhHisAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwC4Ad98840sFovy8/Od3QqAJoA3rwPADaiurtb333+v9u3b8800AAQrALhelZWVcnNzc3YbAJoQLgUCwP8ZMmSIEhISlJCQIC8vL7Vv314zZ85U7X9/durUSXPnztVTTz0lT09PTZgwoc5LgQUFBXr44Yfl6empNm3aaNCgQfr666+N8ffff1/dunWTh4eHunbtqnfffbexDxVAA+G8NQBcYvXq1YqLi9O+ffuUm5urCRMmqEOHDho/frwk6e2331ZycrJmzZpV5/rfffedBg8erCFDhmj79u3y9PTU559/rosXL0qS1qxZo+TkZC1dulT33HOPDh48qPHjx6t169aKjY1ttOME0DC4FAgA/2fIkCEqLi5WQUGBLBaLJOnFF1/Uxo0b9cUXX6hTp0665557tGHDBmOdb775RsHBwTp48KBCQkL00ksvae3atSosLFTLli0v28fdd9+tuXPnaty4ccayV199VVu2bNHu3bsb/iABNCguBQLAJe677z4jVElSeHi4jh8/rurqaklSaGhovevn5+dr0KBBdYaq8vJyff3114qLi9Ptt99uTK+++qrDpUIAzReXAgHgGrRu3bre8VatWv3oWFlZmSTpvffeU1hYmMNYixYtbrw5AE5HsAKAS+zdu9dhfs+ePercufNVB5/evXtr9erVqqqquuyslZ+fnwIDA/X3v/9dMTExpvUMoOngUiAAXOLkyZNKSkpSYWGh/vu//1tLlizR5MmTr3r9hIQE2Ww2jR07Vrm5uTp+/Lj+8Ic/qLCwUJL0yiuvKCUlRYsXL9aXX36pw4cPa+XKlVqwYEFDHRKARsQZKwC4xFNPPaUffvhB/fv3V4sWLTR58mRNmDDhqtdv166dtm/frqlTp+qBBx5QixYtFBISovvvv1+S9Oyzz+q2227TW2+9palTp6p169bq1auXEhMTG+iIADQmngoEgP8zZMgQhYSEaOHChc5uBUAzxaVAAAAAkxCsAAAATMKlQAAAAJNwxgoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMMn/B/DvmCxMM/2dAAAAAElFTkSuQmCC\n"
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "mercari_df['price'] = np.log1p(mercari_df['price'])\n",
+ "mercari_df['price'].head(3)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 178
+ },
+ "id": "y-8DTZNyB5zN",
+ "outputId": "8c8f110f-8157-48ba-eec1-0ed2312d7cee"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 2.397895\n",
+ "1 3.970292\n",
+ "2 2.397895\n",
+ "Name: price, dtype: float64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " price \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2.397895 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 3.970292 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2.397895 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: float64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print('Shipping 값 유형:\\n',mercari_df['shipping'].value_counts())\n",
+ "print('item_condition_id 값 유형:\\n',mercari_df['item_condition_id'].value_counts())\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wcqg13iDB53X",
+ "outputId": "0b6baded-f99a-44ad-a5d1-c85cbca4dfc9"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Shipping 값 유형:\n",
+ " shipping\n",
+ "0 819435\n",
+ "1 663100\n",
+ "Name: count, dtype: int64\n",
+ "item_condition_id 값 유형:\n",
+ " item_condition_id\n",
+ "1 640549\n",
+ "3 432161\n",
+ "2 375479\n",
+ "4 31962\n",
+ "5 2384\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "boolean_cond= mercari_df['item_description']=='No description yet'\n",
+ "mercari_df[boolean_cond]['item_description'].count()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5u809R1OB55R",
+ "outputId": "e098117b-aa6e-4d51-efca-868a575a2791"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "np.int64(82489)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# apply lambda에서 호출되는 대,중,소 분할 함수 생성, 대,중,소 값을 리스트 반환\n",
+ "def split_cat(category_name):\n",
+ " try:\n",
+ " return category_name.split('/')\n",
+ " except:\n",
+ " return ['Other_Null' , 'Other_Null' , 'Other_Null']\n",
+ "\n",
+ "# 위의 split_cat( )을 apply lambda에서 호출하여 대,중,소 컬럼을 mercari_df에 생성.\n",
+ "mercari_df['cat_dae'], mercari_df['cat_jung'], mercari_df['cat_so'] = \\\n",
+ " zip(*mercari_df['category_name'].apply(lambda x : split_cat(x)))\n",
+ "\n",
+ "# 대분류만 값의 유형과 건수를 살펴보고, 중분류, 소분류는 값의 유형이 많으므로 분류 갯수만 추출\n",
+ "print('대분류 유형 :\\n', mercari_df['cat_dae'].value_counts())\n",
+ "print('중분류 갯수 :', mercari_df['cat_jung'].nunique())\n",
+ "print('소분류 갯수 :', mercari_df['cat_so'].nunique())\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4WACvyR_B59K",
+ "outputId": "b2a16b1e-c99e-4918-efd7-223b5ff87eba"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "대분류 유형 :\n",
+ " cat_dae\n",
+ "Women 664385\n",
+ "Beauty 207828\n",
+ "Kids 171689\n",
+ "Electronics 122690\n",
+ "Men 93680\n",
+ "Home 67871\n",
+ "Vintage & Collectibles 46530\n",
+ "Other 45351\n",
+ "Handmade 30842\n",
+ "Sports & Outdoors 25342\n",
+ "Other_Null 6327\n",
+ "Name: count, dtype: int64\n",
+ "중분류 갯수 : 114\n",
+ "소분류 갯수 : 871\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "mercari_df['brand_name'] = mercari_df['brand_name'].fillna(value='Other_Null')\n",
+ "mercari_df['category_name'] = mercari_df['category_name'].fillna(value='Other_Null')\n",
+ "mercari_df['item_description'] = mercari_df['item_description'].fillna(value='Other_Null')\n",
+ "\n",
+ "# 각 컬럼별로 Null값 건수 확인. 모두 0가 나와야 합니다.\n",
+ "mercari_df.isnull().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 429
+ },
+ "id": "dVsh0vq_B5_y",
+ "outputId": "3a19ffd9-48ea-426d-e82f-ebda1dad0858"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "train_id 0\n",
+ "name 0\n",
+ "item_condition_id 0\n",
+ "category_name 0\n",
+ "brand_name 0\n",
+ "price 0\n",
+ "shipping 0\n",
+ "item_description 0\n",
+ "cat_dae 0\n",
+ "cat_jung 0\n",
+ "cat_so 0\n",
+ "dtype: int64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " train_id \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " name \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " item_condition_id \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " category_name \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " brand_name \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " price \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " shipping \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " item_description \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " cat_dae \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " cat_jung \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " cat_so \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: int64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print('brand name 의 유형 건수 :', mercari_df['brand_name'].nunique())\n",
+ "print('brand name sample 5건 : \\n', mercari_df['brand_name'].value_counts()[:5])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4qUP4S4JB6Hk",
+ "outputId": "c0921ed3-62d4-4ed6-fd8a-43fbbf231920"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "brand name 의 유형 건수 : 4810\n",
+ "brand name sample 5건 : \n",
+ " brand_name\n",
+ "Other_Null 632682\n",
+ "PINK 54088\n",
+ "Nike 54043\n",
+ "Victoria's Secret 48036\n",
+ "LuLaRoe 31024\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print('name 의 종류 갯수 :', mercari_df['name'].nunique())\n",
+ "print('name sample 7건 : \\n', mercari_df['name'][:7])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UzsQfwTcB6Jd",
+ "outputId": "7b2ff271-d553-43f5-be0f-4862d2923b3b"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "name 의 종류 갯수 : 1225273\n",
+ "name sample 7건 : \n",
+ " 0 MLB Cincinnati Reds T Shirt Size XL\n",
+ "1 Razer BlackWidow Chroma Keyboard\n",
+ "2 AVA-VIV Blouse\n",
+ "3 Leather Horse Statues\n",
+ "4 24K GOLD plated rose\n",
+ "5 Bundled items requested for Ruie\n",
+ "6 Acacia pacific tides santorini top\n",
+ "Name: name, dtype: object\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pd.set_option('max_colwidth', 200)\n",
+ "\n",
+ "# item_description의 평균 문자열 개수\n",
+ "print('item_description 평균 문자열 개수:',mercari_df['item_description'].str.len().mean())\n",
+ "\n",
+ "mercari_df['item_description'][:2]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 182
+ },
+ "id": "L6N1JdJjB6Lk",
+ "outputId": "7dcf43f3-c509-4b38-f967-4ea8989c2951"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "item_description 평균 문자열 개수: 145.71139703278507\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 No description yet\n",
+ "1 This keyboard is in great condition and works like it came out of the box. All of the ports are tested and work perfectly. The lights are customizable via the Razer Synapse app on your PC.\n",
+ "Name: item_description, dtype: object"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " item_description \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " No description yet \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " This keyboard is in great condition and works like it came out of the box. All of the ports are tested and work perfectly. The lights are customizable via the Razer Synapse app on your PC. \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: object "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# name 속성에 대한 feature vectorization 변환\n",
+ "cnt_vec = CountVectorizer()\n",
+ "X_name = cnt_vec.fit_transform(mercari_df.name)\n",
+ "\n",
+ "# item_description 에 대한 feature vectorization 변환\n",
+ "tfidf_descp = TfidfVectorizer(max_features = 50000, ngram_range= (1,3) , stop_words='english')\n",
+ "X_descp = tfidf_descp.fit_transform(mercari_df['item_description'])\n",
+ "\n",
+ "print('name vectorization shape:',X_name.shape)\n",
+ "print('item_description vectorization shape:',X_descp.shape)\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oO9B-q-zB6Nj",
+ "outputId": "7bdea7b2-f9b2-451c-dffb-b8f3c3a7033a"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "name vectorization shape: (1482535, 105757)\n",
+ "item_description vectorization shape: (1482535, 50000)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.preprocessing import LabelBinarizer\n",
+ "\n",
+ "# brand_name, item_condition_id, shipping 각 피처들을 희소 행렬 원-핫 인코딩 변환\n",
+ "lb_brand_name= LabelBinarizer(sparse_output=True)\n",
+ "X_brand = lb_brand_name.fit_transform(mercari_df['brand_name'])\n",
+ "\n",
+ "lb_item_cond_id = LabelBinarizer(sparse_output=True)\n",
+ "X_item_cond_id = lb_item_cond_id.fit_transform(mercari_df['item_condition_id'])\n",
+ "\n",
+ "lb_shipping= LabelBinarizer(sparse_output=True)\n",
+ "X_shipping = lb_shipping.fit_transform(mercari_df['shipping'])\n",
+ "\n",
+ "# cat_dae, cat_jung, cat_so 각 피처들을 희소 행렬 원-핫 인코딩 변환\n",
+ "lb_cat_dae = LabelBinarizer(sparse_output=True)\n",
+ "X_cat_dae= lb_cat_dae.fit_transform(mercari_df['cat_dae'])\n",
+ "\n",
+ "lb_cat_jung = LabelBinarizer(sparse_output=True)\n",
+ "X_cat_jung = lb_cat_jung.fit_transform(mercari_df['cat_jung'])\n",
+ "\n",
+ "lb_cat_so = LabelBinarizer(sparse_output=True)\n",
+ "X_cat_so = lb_cat_so.fit_transform(mercari_df['cat_so'])"
+ ],
+ "metadata": {
+ "id": "xvnF4fXuB6Po"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(type(X_brand), type(X_item_cond_id), type(X_shipping))\n",
+ "print('X_brand_shape:{0}, X_item_cond_id shape:{1}'.format(X_brand.shape, X_item_cond_id.shape))\n",
+ "print('X_shipping shape:{0}, X_cat_dae shape:{1}'.format(X_shipping.shape, X_cat_dae.shape))\n",
+ "print('X_cat_jung shape:{0}, X_cat_so shape:{1}'.format(X_cat_jung.shape, X_cat_so.shape))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mKSZFwqCB6Rw",
+ "outputId": "05c81ea3-bccf-4afe-b6fd-543dc251f427"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " \n",
+ "X_brand_shape:(1482535, 4810), X_item_cond_id shape:(1482535, 5)\n",
+ "X_shipping shape:(1482535, 1), X_cat_dae shape:(1482535, 11)\n",
+ "X_cat_jung shape:(1482535, 114), X_cat_so shape:(1482535, 871)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from scipy.sparse import hstack\n",
+ "import gc\n",
+ "\n",
+ "sparse_matrix_list = (X_name, X_descp, X_brand, X_item_cond_id,\n",
+ " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n",
+ "\n",
+ "# 사이파이 sparse 모듈의 hstack 함수를 이용하여 앞에서 인코딩과 Vectorization을 수행한 데이터 셋을 모두 결합.\n",
+ "X_features_sparse= hstack(sparse_matrix_list).tocsr()\n",
+ "print(type(X_features_sparse), X_features_sparse.shape)\n",
+ "\n",
+ "# 데이터 셋이 메모리를 많이 차지하므로 사용 용도가 끝났으면 바로 메모리에서 삭제.\n",
+ "del X_features_sparse\n",
+ "gc.collect()\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "EBSHz5YRB6T3",
+ "outputId": "eb6744cd-e1fa-4539-f391-6257f81adaa5"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " (1482535, 161569)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def rmsle(y , y_pred):\n",
+ " # underflow, overflow를 막기 위해 log가 아닌 log1p로 rmsle 계산\n",
+ " return np.sqrt(np.mean(np.power(np.log1p(y) - np.log1p(y_pred), 2)))\n",
+ "\n",
+ "def evaluate_org_price(y_test , preds):\n",
+ "\n",
+ " # 원본 데이터는 log1p로 변환되었으므로 exmpm1으로 원복 필요.\n",
+ " preds_exmpm = np.expm1(preds)\n",
+ " y_test_exmpm = np.expm1(y_test)\n",
+ "\n",
+ " # rmsle로 RMSLE 값 추출\n",
+ " rmsle_result = rmsle(y_test_exmpm, preds_exmpm)\n",
+ " return rmsle_result"
+ ],
+ "metadata": {
+ "id": "JlCcx3bVB6Wa"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import gc\n",
+ "from scipy.sparse import hstack\n",
+ "\n",
+ "def model_train_predict(model,matrix_list):\n",
+ " # scipy.sparse 모듈의 hstack 을 이용하여 sparse matrix 결합\n",
+ " X= hstack(matrix_list).tocsr()\n",
+ "\n",
+ " X_train, X_test, y_train, y_test=train_test_split(X, mercari_df['price'],\n",
+ " test_size=0.2, random_state=156)\n",
+ "\n",
+ " # 모델 학습 및 예측\n",
+ " model.fit(X_train , y_train)\n",
+ " preds = model.predict(X_test)\n",
+ "\n",
+ " del X , X_train , X_test , y_train\n",
+ " gc.collect()\n",
+ "\n",
+ " return preds , y_test"
+ ],
+ "metadata": {
+ "id": "6GOni4z4B6fi"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "linear_model = Ridge(solver = \"lsqr\", fit_intercept=False)\n",
+ "\n",
+ "sparse_matrix_list = (X_name, X_brand, X_item_cond_id,\n",
+ " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n",
+ "linear_preds , y_test = model_train_predict(model=linear_model ,matrix_list=sparse_matrix_list)\n",
+ "print('Item Description을 제외했을 때 rmsle 값:', evaluate_org_price(y_test , linear_preds))\n",
+ "\n",
+ "sparse_matrix_list = (X_descp, X_name, X_brand, X_item_cond_id,\n",
+ " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n",
+ "linear_preds , y_test = model_train_predict(model=linear_model , matrix_list=sparse_matrix_list)\n",
+ "print('Item Description을 포함한 rmsle 값:', evaluate_org_price(y_test ,linear_preds))\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NQe_ycVxB6hW",
+ "outputId": "d6506169-fadc-4d2c-9986-781d0bcc1ce2"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Item Description을 제외했을 때 rmsle 값: 0.4983990938999374\n",
+ "Item Description을 포함한 rmsle 값: 0.4680432471796771\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from lightgbm import LGBMRegressor\n",
+ "\n",
+ "sparse_matrix_list = (X_descp, X_name, X_brand, X_item_cond_id,\n",
+ " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n",
+ "\n",
+ "lgbm_model = LGBMRegressor(n_estimators=200, learning_rate=0.5, num_leaves=125, random_state=156)\n",
+ "lgbm_preds , y_test = model_train_predict(model = lgbm_model , matrix_list=sparse_matrix_list)\n",
+ "print('LightGBM rmsle 값:', evaluate_org_price(y_test , lgbm_preds))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "vGJUeJABCJSW",
+ "outputId": "464be022-2a6d-4d64-832c-998c70f80c88"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 3450.660095 seconds.\n",
+ "You can set `force_row_wise=true` to remove the overhead.\n",
+ "And if memory is not enough, you can set `force_col_wise=true`.\n",
+ "[LightGBM] [Info] Total Bins 1068323\n",
+ "[LightGBM] [Info] Number of data points in the train set: 1186028, number of used features: 65338\n",
+ "[LightGBM] [Info] Start training from score 2.979514\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "LightGBM rmsle 값: 0.4563962127849484\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "preds = lgbm_preds * 0.45 + linear_preds * 0.55\n",
+ "print('LightGBM과 Ridge를 ensemble한 최종 rmsle 값:', evaluate_org_price(y_test , preds))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "u6zW8DcZCJUy",
+ "outputId": "6aa8b564-9074-4141-89a3-c92254e755f4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "LightGBM과 Ridge를 ensemble한 최종 rmsle 값: 0.4467272727321774\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file