diff --git "a/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\214\341\205\265\341\206\253\341\204\213\341\205\260\341\204\213\341\205\265\341\204\213\341\205\243\341\206\253.ipynb" "b/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\214\341\205\265\341\206\253\341\204\213\341\205\260\341\204\213\341\205\265\341\204\213\341\205\243\341\206\253.ipynb" new file mode 100644 index 0000000..d163e0c --- /dev/null +++ "b/Week13_\341\204\207\341\205\251\341\206\250\341\204\211\341\205\263\341\206\270\341\204\200\341\205\252\341\204\214\341\205\246_\341\204\214\341\205\265\341\206\253\341\204\213\341\205\260\341\204\213\341\205\265\341\204\213\341\205\243\341\206\253.ipynb" @@ -0,0 +1,752 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "28bb1023", + "metadata": {}, + "source": [ + "# 데이터 전처리" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4ac901a9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1482535, 8)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
train_idnameitem_condition_idcategory_namebrand_namepriceshippingitem_description
00MLB Cincinnati Reds T Shirt Size XL3Men/Tops/T-shirtsNaN10.01No description yet
11Razer BlackWidow Chroma Keyboard3Electronics/Computers & Tablets/Components & P...Razer52.00This keyboard is in great condition and works ...
22AVA-VIV Blouse1Women/Tops & Blouses/BlouseTarget10.01Adorable top with a hint of lace and a key hol...
\n", + "
" + ], + "text/plain": [ + " train_id name item_condition_id \\\n", + "0 0 MLB Cincinnati Reds T Shirt Size XL 3 \n", + "1 1 Razer BlackWidow Chroma Keyboard 3 \n", + "2 2 AVA-VIV Blouse 1 \n", + "\n", + " category_name brand_name price \\\n", + "0 Men/Tops/T-shirts NaN 10.0 \n", + "1 Electronics/Computers & Tablets/Components & P... Razer 52.0 \n", + "2 Women/Tops & Blouses/Blouse Target 10.0 \n", + "\n", + " shipping item_description \n", + "0 1 No description yet \n", + "1 0 This keyboard is in great condition and works ... \n", + "2 1 Adorable top with a hint of lace and a key hol... " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import Ridge , LogisticRegression\n", + "from sklearn.model_selection import train_test_split , cross_val_score\n", + "from sklearn.feature_extraction.text import CountVectorizer , TfidfVectorizer\n", + "import pandas as pd\n", + "\n", + "mercari_df = pd.read_csv('mercari_train.tsv',sep='\\t')\n", + "print(mercari_df.shape)\n", + "mercari_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "26d90c10", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1482535 entries, 0 to 1482534\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 train_id 1482535 non-null int64 \n", + " 1 name 1482535 non-null object \n", + " 2 item_condition_id 1482535 non-null int64 \n", + " 3 category_name 1476208 non-null object \n", + " 4 brand_name 849853 non-null object \n", + " 5 price 1482535 non-null float64\n", + " 6 shipping 1482535 non-null int64 \n", + " 7 item_description 1482529 non-null object \n", + "dtypes: float64(1), int64(3), object(4)\n", + "memory usage: 90.5+ MB\n", + "None\n" + ] + } + ], + "source": [ + "print(mercari_df.info())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "83871101", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAFzCAYAAADCCShJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAxKklEQVR4nO3de1TVdb7H/xcg7JDBbygD222m1OlmWBbmtcIpJR3JaTq/bhiTa4zqeIvBpmy6qaukzKHOkTFtus7pQmv9ijmdnx6SzCyPgIRSgGXOCS8hiBls0OQifH5/NH5XW/ACbt347flYa6/l/nze3+/389lfNrz8XvYOMsYYAQAAOERwoAcAAADgT4QbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKL0CPYCfm/b2du3evVuRkZEKCgoK9HAAADhjGGPU2Ngoj8ej4OCjH58h3Jxmu3fv1sCBAwM9DAAAzli7du3SOeecc9R+ws1pFhkZKenHHdOnT58AjwYAgDNHQ0ODBg4caP8tPRrCzWl2+FRUnz59CDcAAHTD8S7r4IJiAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKHz9ggO0tLSotLTUp23YsGEKCwsLzIAAAAggwo0DlJaWatZf/kuW5zxJknf3N8qeKY0YMSLAIwMA4PQj3DiE5TlP/eKGBHoYAAAEHNfcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARwlouDl06JAeffRRxcXFKTw8XOedd54WLlyo9vZ2u8YYo/nz58vj8Sg8PFzjxo1TRUWFz3qam5s1e/ZsRUdHKyIiQlOmTNG3337rU1NXV6fU1FRZliXLspSamqr6+nqfmp07d+rGG29URESEoqOjNWfOHLW0tPjUlJWVKTExUeHh4RowYIAWLlwoY4x/XxgAANBtAQ03zzzzjJYvX67s7Gx9+eWXWrx4sZ599lktXbrUrlm8eLGysrKUnZ2t4uJiud1uTZgwQY2NjXZNenq6cnNzlZOTo/Xr12v//v1KTk5WW1ubXZOSkqLS0lLl5eUpLy9PpaWlSk1Ntfvb2to0efJkHThwQOvXr1dOTo7effddzZ07165paGjQhAkT5PF4VFxcrKVLl2rJkiXKyso6xa8UAAA4YSaAJk+ebH7/+9/7tN18883mzjvvNMYY097ebtxut3n66aft/qamJmNZllm+fLkxxpj6+noTGhpqcnJy7JqqqioTHBxs8vLyjDHGbNmyxUgyhYWFdk1BQYGRZL766itjjDGrVq0ywcHBpqqqyq55++23jcvlMl6v1xhjzLJly4xlWaapqcmuyczMNB6Px7S3t5/QnL1er5Fkr9MfioqKzPh5L5nbVmwwt63YYMbPe8kUFRX5bf0AAPQEJ/o3NKBHbq6++mqtWbNGX3/9tSTp888/1/r16/XrX/9aklRZWamamholJSXZy7hcLiUmJmrDhg2SpJKSErW2tvrUeDwexcfH2zUFBQWyLEsjR460a0aNGiXLsnxq4uPj5fF47JobbrhBzc3NKikpsWsSExPlcrl8anbv3q3t27d3Osfm5mY1NDT4PAAAwKnTK5Abf+ihh+T1enXxxRcrJCREbW1teuqpp3THHXdIkmpqaiRJsbGxPsvFxsZqx44ddk1YWJiioqI61BxevqamRjExMR22HxMT41Nz5HaioqIUFhbmUzN48OAO2zncFxcX12EbmZmZWrBgwfFfDAAA4BcBPXLzzjvv6I033tBbb72lTZs26fXXX9eSJUv0+uuv+9QFBQX5PDfGdGg70pE1ndX7o8b882Lio43n4YcfltfrtR+7du065rgBAMDJCeiRmz/+8Y+aN2+ebr/9dknS0KFDtWPHDmVmZuquu+6S2+2W9ONRkf79+9vL1dbW2kdM3G63WlpaVFdX53P0pra2VmPGjLFr9uzZ02H7e/fu9VlPUVGRT39dXZ1aW1t9ag4fxfnpdqSOR5cOc7lcPqexAADAqRXQIzc//PCDgoN9hxASEmLfCh4XFye32638/Hy7v6WlRevWrbODS0JCgkJDQ31qqqurVV5ebteMHj1aXq9XGzdutGuKiork9Xp9asrLy1VdXW3XrF69Wi6XSwkJCXbNJ5984nN7+OrVq+XxeDqcrgIAAIER0HBz44036qmnntLKlSu1fft25ebmKisrS7/97W8l/XiqJz09XYsWLVJubq7Ky8s1bdo09e7dWykpKZIky7I0ffp0zZ07V2vWrNHmzZt15513aujQoRo/frwk6ZJLLtHEiROVlpamwsJCFRYWKi0tTcnJybroooskSUlJSRoyZIhSU1O1efNmrVmzRg888IDS0tLUp08fST/eTu5yuTRt2jSVl5crNzdXixYtUkZGxnFPkwEAgNMjoKelli5dqscee0wzZsxQbW2tPB6P7r33Xj3++ON2zYMPPqiDBw9qxowZqqur08iRI7V69WpFRkbaNc8995x69eqlW2+9VQcPHtT111+v1157TSEhIXbNm2++qTlz5th3VU2ZMkXZ2dl2f0hIiFauXKkZM2Zo7NixCg8PV0pKipYsWWLXWJal/Px8zZw5U8OHD1dUVJQyMjKUkZFxKl8mAADQBUHG8PG6p1NDQ4Msy5LX67WPCJ2sjRs36pHcMvWLGyJJ2le5RU/9dqhGjBjhl/UDANATnOjfUL5bCgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOArhBgAAOErAw01VVZXuvPNO9evXT71799awYcNUUlJi9xtjNH/+fHk8HoWHh2vcuHGqqKjwWUdzc7Nmz56t6OhoRUREaMqUKfr22299aurq6pSamirLsmRZllJTU1VfX+9Ts3PnTt14442KiIhQdHS05syZo5aWFp+asrIyJSYmKjw8XAMGDNDChQtljPHviwIAALotoOGmrq5OY8eOVWhoqP7nf/5HW7Zs0Z///GedffbZds3ixYuVlZWl7OxsFRcXy+12a8KECWpsbLRr0tPTlZubq5ycHK1fv1779+9XcnKy2tra7JqUlBSVlpYqLy9PeXl5Ki0tVWpqqt3f1tamyZMn68CBA1q/fr1ycnL07rvvau7cuXZNQ0ODJkyYII/Ho+LiYi1dulRLlixRVlbWqX2hAADAiTMB9NBDD5mrr776qP3t7e3G7Xabp59+2m5ramoylmWZ5cuXG2OMqa+vN6GhoSYnJ8euqaqqMsHBwSYvL88YY8yWLVuMJFNYWGjXFBQUGEnmq6++MsYYs2rVKhMcHGyqqqrsmrffftu4XC7j9XqNMcYsW7bMWJZlmpqa7JrMzEzj8XhMe3v7Cc3Z6/UaSfY6/aGoqMiMn/eSuW3FBnPbig1m/LyXTFFRkd/WDwBAT3Cif0MDeuTm/fff1/Dhw3XLLbcoJiZGV1xxhf7617/a/ZWVlaqpqVFSUpLd5nK5lJiYqA0bNkiSSkpK1Nra6lPj8XgUHx9v1xQUFMiyLI0cOdKuGTVqlCzL8qmJj4+Xx+Oxa2644QY1Nzfbp8kKCgqUmJgol8vlU7N7925t37690zk2NzeroaHB5wEAAE6dgIabb775Ri+88IIuuOACffDBB7rvvvs0Z84c/e1vf5Mk1dTUSJJiY2N9louNjbX7ampqFBYWpqioqGPWxMTEdNh+TEyMT82R24mKilJYWNgxaw4/P1xzpMzMTPs6H8uyNHDgwOO8KgAA4GQENNy0t7fryiuv1KJFi3TFFVfo3nvvVVpaml544QWfuqCgIJ/nxpgObUc6sqazen/UmH9eTHy08Tz88MPyer32Y9euXcccNwAAODkBDTf9+/fXkCFDfNouueQS7dy5U5LkdrsldTwqUltbax8xcbvdamlpUV1d3TFr9uzZ02H7e/fu9ak5cjt1dXVqbW09Zk1tba2kjkeXDnO5XOrTp4/PAwAAnDoBDTdjx47V1q1bfdq+/vprDRo0SJIUFxcnt9ut/Px8u7+lpUXr1q3TmDFjJEkJCQkKDQ31qamurlZ5ebldM3r0aHm9Xm3cuNGuKSoqktfr9akpLy9XdXW1XbN69Wq5XC4lJCTYNZ988onP7eGrV6+Wx+PR4MGD/fGSAACAkxTQcPOHP/xBhYWFWrRokf7xj3/orbfe0osvvqiZM2dK+vFUT3p6uhYtWqTc3FyVl5dr2rRp6t27t1JSUiRJlmVp+vTpmjt3rtasWaPNmzfrzjvv1NChQzV+/HhJPx4NmjhxotLS0lRYWKjCwkKlpaUpOTlZF110kSQpKSlJQ4YMUWpqqjZv3qw1a9bogQceUFpamn20JSUlRS6XS9OmTVN5eblyc3O1aNEiZWRkHPc0GQAAOE1O/Y1bx/bf//3fJj4+3rhcLnPxxRebF1980ae/vb3dPPHEE8btdhuXy2WuvfZaU1ZW5lNz8OBBM2vWLNO3b18THh5ukpOTzc6dO31q9u3bZ6ZOnWoiIyNNZGSkmTp1qqmrq/Op2bFjh5k8ebIJDw83ffv2NbNmzfK57dsYY7744gtzzTXXGJfLZdxut5k/f/4J3wZuDLeCAwDQXSf6NzTIGD5e93RqaGiQZVnyer1+u/5m48aNeiS3TP3ifrx+aV/lFj3126EaMWKEX9YPAEBPcKJ/QwP+9QsAAAD+RLgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACO0q1wc95552nfvn0d2uvr63Xeeeed9KAAAAC6q1vhZvv27Wpra+vQ3tzcrKqqqpMeFAAAQHf16krx+++/b//7gw8+kGVZ9vO2tjatWbNGgwcP9tvgAAAAuqpL4eamm26SJAUFBemuu+7y6QsNDdXgwYP15z//2W+DAwAA6KouhZv29nZJUlxcnIqLixUdHX1KBgUAANBdXQo3h1VWVvp7HAAAAH7RrXAjSWvWrNGaNWtUW1trH9E57JVXXjnpgQEAAHRHt8LNggULtHDhQg0fPlz9+/dXUFCQv8cFAADQLd0KN8uXL9drr72m1NRUf48HAADgpHTrc25aWlo0ZswYf48FAADgpHUr3Nx999166623/D0WAACAk9at01JNTU168cUX9eGHH+qyyy5TaGioT39WVpZfBgcAANBV3Qo3X3zxhYYNGyZJKi8v9+nj4mIAABBI3Qo3a9eu9fc4AAAA/KJb19wAAAD0VN06cvOrX/3qmKefPvroo24PCAAA4GR0K9wcvt7msNbWVpWWlqq8vLzDF2oCAACcTt0KN88991yn7fPnz9f+/ftPakAAAAAnw6/X3Nx55518rxQAAAgov4abgoICnXXWWf5cJQAAQJd067TUzTff7PPcGKPq6mp99tlneuyxx/wyMAAAgO7oVrixLMvneXBwsC666CItXLhQSUlJfhkYAABAd3Qr3Lz66qv+HgcAAIBfdCvcHFZSUqIvv/xSQUFBGjJkiK644gp/jQsAAKBbuhVuamtrdfvtt+vjjz/W2WefLWOMvF6vfvWrXyknJ0e//OUv/T1OAACAE9Ktu6Vmz56thoYGVVRU6Pvvv1ddXZ3Ky8vV0NCgOXPm+HuMAAAAJ6xbR27y8vL04Ycf6pJLLrHbhgwZor/85S9cUAwAAAKqW0du2tvbFRoa2qE9NDRU7e3tJz0oAACA7upWuLnuuut0//33a/fu3XZbVVWV/vCHP+j666/32+AAAAC6qlvhJjs7W42NjRo8eLDOP/98/cu//Ivi4uLU2NiopUuX+nuMAAAAJ6xb19wMHDhQmzZtUn5+vr766isZYzRkyBCNHz/e3+MDAADoki4dufnoo480ZMgQNTQ0SJImTJig2bNna86cObrqqqt06aWX6tNPPz0lAwUAADgRXQo3zz//vNLS0tSnT58OfZZl6d5771VWVpbfBgcAANBVXQo3n3/+uSZOnHjU/qSkJJWUlJz0oAAAALqrS+Fmz549nd4CflivXr20d+/ekx4UAABAd3Up3AwYMEBlZWVH7f/iiy/Uv3//kx4UAABAd3Up3Pz617/W448/rqampg59Bw8e1BNPPKHk5ORuDSQzM1NBQUFKT0+324wxmj9/vjwej8LDwzVu3DhVVFT4LNfc3KzZs2crOjpaERERmjJlir799lufmrq6OqWmpsqyLFmWpdTUVNXX1/vU7Ny5UzfeeKMiIiIUHR2tOXPmqKWlxaemrKxMiYmJCg8P14ABA7Rw4UIZY7o1XwAAcGp0Kdw8+uij+v7773XhhRdq8eLF+q//+i+9//77euaZZ3TRRRfp+++/1yOPPNLlQRQXF+vFF1/UZZdd5tO+ePFiZWVlKTs7W8XFxXK73ZowYYIaGxvtmvT0dOXm5ionJ0fr16/X/v37lZycrLa2NrsmJSVFpaWlysvLU15enkpLS5Wammr3t7W1afLkyTpw4IDWr1+vnJwcvfvuu5o7d65d09DQoAkTJsjj8ai4uFhLly7VkiVLuIAaAICexnTR9u3bzaRJk0xwcLAJCgoyQUFBJjg42EyaNMlUVlZ2dXWmsbHRXHDBBSY/P98kJiaa+++/3xhjTHt7u3G73ebpp5+2a5uamoxlWWb58uXGGGPq6+tNaGioycnJsWuqqqpMcHCwycvLM8YYs2XLFiPJFBYW2jUFBQVGkvnqq6+MMcasWrXKBAcHm6qqKrvm7bffNi6Xy3i9XmOMMcuWLTOWZZmmpia7JjMz03g8HtPe3n7C8/V6vUaSvV5/KCoqMuPnvWRuW7HB3LZigxk/7yVTVFTkt/UDANATnOjf0C5/QvGgQYO0atUqfffddyoqKlJhYaG+++47rVq1SoMHD+5yuJo5c6YmT57c4QMAKysrVVNT4/NFnC6XS4mJidqwYYMkqaSkRK2trT41Ho9H8fHxdk1BQYEsy9LIkSPtmlGjRsmyLJ+a+Ph4eTweu+aGG25Qc3OzffdXQUGBEhMT5XK5fGp2796t7du3H3V+zc3Namho8HkAAIBTp1ufUCxJUVFRuuqqq05q4zk5Odq0aZOKi4s79NXU1EiSYmNjfdpjY2O1Y8cOuyYsLExRUVEdag4vX1NTo5iYmA7rj4mJ8ak5cjtRUVEKCwvzqTkyvB1epqamRnFxcZ3OMTMzUwsWLOi0DwAA+F+3vlvKH3bt2qX7779fb7zxhs4666yj1gUFBfk8N8Z0aDvSkTWd1fujxvzzYuJjjefhhx+W1+u1H7t27Trm2AEAwMkJWLgpKSlRbW2tEhIS1KtXL/Xq1Uvr1q3Tf/zHf6hXr14+R0V+qra21u5zu91qaWlRXV3dMWv27NnTYft79+71qTlyO3V1dWptbT1mTW1traSOR5d+yuVyqU+fPj4PAABw6gQs3Fx//fUqKytTaWmp/Rg+fLimTp2q0tJSnXfeeXK73crPz7eXaWlp0bp16zRmzBhJUkJCgkJDQ31qqqurVV5ebteMHj1aXq9XGzdutGuKiork9Xp9asrLy1VdXW3XrF69Wi6XSwkJCXbNJ5984nN7+OrVq+XxeLp1rREAADg1un3NzcmKjIxUfHy8T1tERIT69etnt6enp2vRokW64IILdMEFF2jRokXq3bu3UlJSJP34fVbTp0/X3Llz1a9fP/Xt21cPPPCAhg4dal+gfMkll2jixIlKS0vTihUrJEn33HOPkpOTddFFF0n68WsjhgwZotTUVD377LP6/vvv9cADD/h8j1ZKSooWLFigadOm6U9/+pO2bdumRYsW6fHHHz/uaTIAAHD6BCzcnIgHH3xQBw8e1IwZM1RXV6eRI0dq9erVioyMtGuee+459erVS7feeqsOHjyo66+/Xq+99ppCQkLsmjfffFNz5syx76qaMmWKsrOz7f6QkBCtXLlSM2bM0NixYxUeHq6UlBQtWbLErrEsS/n5+Zo5c6aGDx+uqKgoZWRkKCMj4zS8EgAA4EQFGcNH7J5ODQ0NsixLXq/Xb9ffbNy4UY/klqlf3BBJ0r7KLXrqt0M1YsQIv6wfAICe4ET/hvboIzfonva2Qx2+pmLYsGEKCwsL0IgAADh9CDcO1Lhnp57b0aTYr9slSd7d3yh7pjiSAwD4WSDcOFRk7CD7NBUAAD8nAbsVHAAA4FQg3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEch3AAAAEcJaLjJzMzUVVddpcjISMXExOimm27S1q1bfWqMMZo/f748Ho/Cw8M1btw4VVRU+NQ0Nzdr9uzZio6OVkREhKZMmaJvv/3Wp6aurk6pqamyLEuWZSk1NVX19fU+NTt37tSNN96oiIgIRUdHa86cOWppafGpKSsrU2JiosLDwzVgwAAtXLhQxhj/vSgAAOCkBDTcrFu3TjNnzlRhYaHy8/N16NAhJSUl6cCBA3bN4sWLlZWVpezsbBUXF8vtdmvChAlqbGy0a9LT05Wbm6ucnBytX79e+/fvV3Jystra2uyalJQUlZaWKi8vT3l5eSotLVVqaqrd39bWpsmTJ+vAgQNav369cnJy9O6772ru3Ll2TUNDgyZMmCCPx6Pi4mItXbpUS5YsUVZW1il+pQAAwAkzPUhtba2RZNatW2eMMaa9vd243W7z9NNP2zVNTU3GsiyzfPlyY4wx9fX1JjQ01OTk5Ng1VVVVJjg42OTl5RljjNmyZYuRZAoLC+2agoICI8l89dVXxhhjVq1aZYKDg01VVZVd8/bbbxuXy2W8Xq8xxphly5YZy7JMU1OTXZOZmWk8Ho9pb28/oTl6vV4jyV6nPxQVFZnx814yt63YYG5bscFc9bs/mTGz/91+Pn7eS6aoqMhv2wMAIBBO9G9oj7rmxuv1SpL69u0rSaqsrFRNTY2SkpLsGpfLpcTERG3YsEGSVFJSotbWVp8aj8ej+Ph4u6agoECWZWnkyJF2zahRo2RZlk9NfHy8PB6PXXPDDTeoublZJSUldk1iYqJcLpdPze7du7V9+/ZO59Tc3KyGhgafBwAAOHV6TLgxxigjI0NXX3214uPjJUk1NTWSpNjYWJ/a2NhYu6+mpkZhYWGKioo6Zk1MTEyHbcbExPjUHLmdqKgohYWFHbPm8PPDNUfKzMy0r/OxLEsDBw48zisBAABORo8JN7NmzdIXX3yht99+u0NfUFCQz3NjTIe2Ix1Z01m9P2rMPy8mPtp4Hn74YXm9Xvuxa9euY44bAACcnB4RbmbPnq33339fa9eu1TnnnGO3u91uSR2PitTW1tpHTNxut1paWlRXV3fMmj179nTY7t69e31qjtxOXV2dWltbj1lTW1srqePRpcNcLpf69Onj8wAAAKdOQMONMUazZs3Se++9p48++khxcXE+/XFxcXK73crPz7fbWlpatG7dOo0ZM0aSlJCQoNDQUJ+a6upqlZeX2zWjR4+W1+vVxo0b7ZqioiJ5vV6fmvLyclVXV9s1q1evlsvlUkJCgl3zySef+Nwevnr1ank8Hg0ePNhPrwoAADgZAQ03M2fO1BtvvKG33npLkZGRqqmpUU1NjQ4ePCjpx1M96enpWrRokXJzc1VeXq5p06apd+/eSklJkSRZlqXp06dr7ty5WrNmjTZv3qw777xTQ4cO1fjx4yVJl1xyiSZOnKi0tDQVFhaqsLBQaWlpSk5O1kUXXSRJSkpK0pAhQ5SamqrNmzdrzZo1euCBB5SWlmYfbUlJSZHL5dK0adNUXl6u3NxcLVq0SBkZGcc9TQYAAE6PXoHc+AsvvCBJGjdunE/7q6++qmnTpkmSHnzwQR08eFAzZsxQXV2dRo4cqdWrVysyMtKuf+6559SrVy/deuutOnjwoK6//nq99tprCgkJsWvefPNNzZkzx76rasqUKcrOzrb7Q0JCtHLlSs2YMUNjx45VeHi4UlJStGTJErvGsizl5+dr5syZGj58uKKiopSRkaGMjAx/vzQAAKCbgozh43VPp4aGBlmWJa/X67frbzZu3KhHcsvUL26IJOmb//3/FGrFamD8VZKkfZVb9NRvh2rEiBF+2R4AAIFwon9De8QFxQAAAP5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI7SK9ADwKnX3nZIFRUVPm3Dhg1TWFhYgEYEAMCpQ7j5GWjcs1PP7WhS7NftkiTv7m+UPVMaMWJEgEcGAID/EW5+JiJjB6lf3JBADwMAgFOOa24AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICjEG4AAICj9Ar0AHD6tbcdUkVFRYf2YcOGKSwsLAAjAgDAfwg3P0ONe3bquR1Niv263W7z7v5G2TOlESNGBHBkAACcPMLNz1Rk7CD1ixsS6GEAAOB3XHMDAAAchXADAAAchXADAAAchXADAAAchQuKIanz28O5NRwAcCYi3EBSx9vDuTUcAHCmItzAxu3hAAAn4JobAADgKBy5Qae4BgcAcKYi3HTDsmXL9Oyzz6q6ulqXXnqpnn/+eV1zzTWBHpZfHXkNTt2ubZp5fYUuvfRSu4awAwDoiQg3XfTOO+8oPT1dy5Yt09ixY7VixQpNmjRJW7Zs0bnnnhvo4fnVT6/B8e7+Rs/llRN2AAA9HuGmi7KysjR9+nTdfffdkqTnn39eH3zwgV544QVlZmYGeHSnVlfDTmtrqyQpNDS0W89PtIZQBQD4KcJNF7S0tKikpETz5s3zaU9KStKGDRs6Xaa5uVnNzc32c6/XK0lqaGjw27j279+v77d/qUPNB3/cRvV29Wr0KqxX8Ak9784y3urt6vWLKHubjbW7tOClL3V2/xJ7nfu2f6kQ1y90dv+B3Xp+IjUHvt+jP/w/v9LFF1/st9cTAHDyhg8f7vd1Hv7baYw5Zh3hpgu+++47tbW1KTY21qc9NjZWNTU1nS6TmZmpBQsWdGgfOHBgJ9X+taWLz7uzzJHPd3SyziPbuvr8eDUz1/6/nSwBAHCqxsZGWZZ11H7CTTcEBQX5PDfGdGg77OGHH1ZGRob9vL29Xd9//7369et31GW6qqGhQQMHDtSuXbvUp08fv6wzkJw2H8l5c3LafCTmdCZw2nwk583pVM/HGKPGxkZ5PJ5j1hFuuiA6OlohISEdjtLU1tZ2OJpzmMvlksvl8mk7++yzT8n4+vTp44g3x2FOm4/kvDk5bT4SczoTOG0+kvPmdCrnc6wjNofxIX5dEBYWpoSEBOXn5/u05+fna8yYMQEaFQAA+CmO3HRRRkaGUlNTNXz4cI0ePVovvviidu7cqfvuuy/QQwMAACLcdNltt92mffv2aeHChaqurlZ8fLxWrVqlQYMGBWxMLpdLTzzxRIfTX2cqp81Hct6cnDYfiTmdCZw2H8l5c+op8wkyx7ufCgAA4AzCNTcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDdnuGXLlikuLk5nnXWWEhIS9OmnnwZ6SJ3KzMzUVVddpcjISMXExOimm27S1q1bfWqmTZumoKAgn8eoUaN8apqbmzV79mxFR0crIiJCU6ZM0bfffns6p2KbP39+h/G63W673xij+fPny+PxKDw8XOPGjVNFRYXPOnrSfAYPHtxhPkFBQZo5c6akM2P/fPLJJ7rxxhvl8XgUFBSkv//97z79/tondXV1Sk1NlWVZsixLqampqq+vP63zaW1t1UMPPaShQ4cqIiJCHo9Hv/vd77R7926fdYwbN67Dfrv99tsDMp/jzUny389ZT5pTZ++roKAgPfvss3ZNT9pPJ/L7uqe/lwg3Z7B33nlH6enpeuSRR7R582Zdc801mjRpknbu3BnooXWwbt06zZw5U4WFhcrPz9ehQ4eUlJSkAwcO+NRNnDhR1dXV9mPVqlU+/enp6crNzVVOTo7Wr1+v/fv3Kzk5WW1tbadzOrZLL73UZ7xlZWV23+LFi5WVlaXs7GwVFxfL7XZrwoQJamxstGt60nyKi4t95nL4wypvueUWu6an758DBw7o8ssvV3Z2dqf9/tonKSkpKi0tVV5envLy8lRaWqrU1NTTOp8ffvhBmzZt0mOPPaZNmzbpvffe09dff60pU6Z0qE1LS/PZbytWrPDpP13zOd6cDvPHz1lPmtNP51JdXa1XXnlFQUFB+td//Vefup6yn07k93WPfy8ZnLFGjBhh7rvvPp+2iy++2MybNy9AIzpxtbW1RpJZt26d3XbXXXeZ3/zmN0ddpr6+3oSGhpqcnBy7raqqygQHB5u8vLxTOdxOPfHEE+byyy/vtK+9vd243W7z9NNP221NTU3GsiyzfPlyY0zPm8+R7r//fnP++eeb9vZ2Y8yZt38kmdzcXPu5v/bJli1bjCRTWFho1xQUFBhJ5quvvjpt8+nMxo0bjSSzY8cOuy0xMdHcf//9R10mUPMxpvM5+ePnrKfN6Ui/+c1vzHXXXefT1pP305G/r8+E9xJHbs5QLS0tKikpUVJSkk97UlKSNmzYEKBRnTiv1ytJ6tu3r0/7xx9/rJiYGF144YVKS0tTbW2t3VdSUqLW1lafOXs8HsXHxwdsztu2bZPH41FcXJxuv/12ffPNN5KkyspK1dTU+IzV5XIpMTHRHmtPnM9hLS0teuONN/T73//e5wtez7T981P+2icFBQWyLEsjR460a0aNGiXLsgI+T6/Xq6CgoA7fX/fmm28qOjpal156qR544AGf/133xPmc7M9ZT5zTYXv27NHKlSs1ffr0Dn09dT8d+fv6THgv8QnFZ6jvvvtObW1tHb6wMzY2tsMXe/Y0xhhlZGTo6quvVnx8vN0+adIk3XLLLRo0aJAqKyv12GOP6brrrlNJSYlcLpdqamoUFhamqKgon/UFas4jR47U3/72N1144YXas2ePnnzySY0ZM0YVFRX2eDrbPzt27JCkHjefn/r73/+u+vp6TZs2zW470/bPkfy1T2pqahQTE9Nh/TExMQGdZ1NTk+bNm6eUlBSfLyycOnWq4uLi5Ha7VV5erocffliff/65fdqxp83HHz9nPW1OP/X6668rMjJSN998s097T91Pnf2+PhPeS4SbM9xP/1ct/fiDeGRbTzNr1ix98cUXWr9+vU/7bbfdZv87Pj5ew4cP16BBg7Ry5coOvwh+KlBznjRpkv3voUOHavTo0Tr//PP1+uuv2xdAdmf/9IR9+PLLL2vSpEnyeDx225m2f47GH/uks/pAzrO1tVW333672tvbtWzZMp++tLQ0+9/x8fG64IILNHz4cG3atElXXnmlpJ41H3/9nPWkOf3UK6+8oqlTp+qss87yae+p++lov687G09Pei9xWuoMFR0drZCQkA7ptra2tkOa7klmz56t999/X2vXrtU555xzzNr+/ftr0KBB2rZtmyTJ7XarpaVFdXV1PnU9Zc4REREaOnSotm3bZt81daz901Pns2PHDn344Ye6++67j1l3pu0ff+0Tt9utPXv2dFj/3r17AzLP1tZW3XrrraqsrFR+fr7PUZvOXHnllQoNDfXZbz1pPkfqzs9ZT53Tp59+qq1btx73vSX1jP10tN/XZ8J7iXBzhgoLC1NCQoJ9yPKw/Px8jRkzJkCjOjpjjGbNmqX33ntPH330keLi4o67zL59+7Rr1y71799fkpSQkKDQ0FCfOVdXV6u8vLxHzLm5uVlffvml+vfvbx9e/ulYW1patG7dOnusPXU+r776qmJiYjR58uRj1p1p+8df+2T06NHyer3auHGjXVNUVCSv13va53k42Gzbtk0ffvih+vXrd9xlKioq1Nraau+3njSfznTn56ynzunll19WQkKCLr/88uPWBnI/He/39RnxXjqpy5ERUDk5OSY0NNS8/PLLZsuWLSY9Pd1ERESY7du3B3poHfzbv/2bsSzLfPzxx6a6utp+/PDDD8YYYxobG83cuXPNhg0bTGVlpVm7dq0ZPXq0GTBggGloaLDXc99995lzzjnHfPjhh2bTpk3muuuuM5dffrk5dOjQaZ/T3Llzzccff2y++eYbU1hYaJKTk01kZKT9+j/99NPGsizz3nvvmbKyMnPHHXeY/v3799j5GGNMW1ubOffcc81DDz3k036m7J/GxkazefNms3nzZiPJZGVlmc2bN9t3D/lrn0ycONFcdtllpqCgwBQUFJihQ4ea5OTk0zqf1tZWM2XKFHPOOeeY0tJSn/dVc3OzMcaYf/zjH2bBggWmuLjYVFZWmpUrV5qLL77YXHHFFQGZz/Hm5M+fs54yp8O8Xq/p3bu3eeGFFzos39P20/F+XxvT899LhJsz3F/+8hczaNAgExYWZq688kqfW6t7EkmdPl599VVjjDE//PCDSUpKMr/85S9NaGioOffcc81dd91ldu7c6bOegwcPmlmzZpm+ffua8PBwk5yc3KHmdLnttttM//79TWhoqPF4PObmm282FRUVdn97e7t54oknjNvtNi6Xy1x77bWmrKzMZx09aT7GGPPBBx8YSWbr1q0+7WfK/lm7dm2nP2d33XWXMcZ/+2Tfvn1m6tSpJjIy0kRGRpqpU6eaurq60zqfysrKo76v1q5da4wxZufOnebaa681ffv2NWFhYeb88883c+bMMfv27QvIfI43J3/+nPWUOR22YsUKEx4eburr6zss39P20/F+XxvT899LQf+cCAAAgCNwzQ0AAHAUwg0AAHAUwg0AAHAUwg0AAHAUwg0AAHAUwg0AAHAUwg0AAHAUwg2An73t27crKChIpaWlgR4KAD/gQ/wA/Oy1tbVp7969io6OVq9evQI9HAAniXAD4GetpaVFYWFhgR4GAD/itBQARxk3bpxmzZqlWbNm6eyzz1a/fv306KOP6vD/4wYPHqwnn3xS06ZNk2VZSktL6/S0VEVFhSZPnqw+ffooMjJS11xzjf7v//7P7n/11Vd1ySWX6KyzztLFF1+sZcuWne6pAjgKjr8CcJzXX39d06dPV1FRkT777DPdc889GjRokNLS0iRJzz77rB577DE9+uijnS5fVVWla6+9VuPGjdNHH32kPn366H//93916NAhSdJf//pXPfHEE8rOztYVV1yhzZs3Ky0tTREREbrrrrtO2zwBdI7TUgAcZdy4caqtrVVFRYWCgoIkSfPmzdP777+vLVu2aPDgwbriiiuUm5trL7N9+3bFxcVp8+bNGjZsmP70pz8pJydHW7duVWhoaIdtnHvuuXrmmWd0xx132G1PPvmkVq1apQ0bNpz6SQI4Jk5LAXCcUaNG2cFGkkaPHq1t27apra1NkjR8+PBjLl9aWqprrrmm02Czd+9e7dq1S9OnT9cvfvEL+/Hkk0/6nLYCEDiclgLwsxMREXHM/vDw8KP2tbe3S/rx1NTIkSN9+kJCQk5+cABOGuEGgOMUFhZ2eH7BBReccPi47LLL9Prrr6u1tbXD0ZvY2FgNGDBA33zzjaZOneq3MQPwH05LAXCcXbt2KSMjQ1u3btXbb7+tpUuX6v777z/h5WfNmqWGhgbdfvvt+uyzz7Rt2zb953/+p7Zu3SpJmj9/vjIzM/Xv//7v+vrrr1VWVqZXX31VWVlZp2pKALqAIzcAHOd3v/udDh48qBEjRigkJESzZ8/WPffcc8LL9+vXTx999JH++Mc/KjExUSEhIRo2bJjGjh0rSbr77rvVu3dvPfvss3rwwQcVERGhoUOHKj09/RTNCEBXcLcUAEcZN26chg0bpueffz7QQwEQIJyWAgAAjkK4AQAAjsJpKQAA4CgcuQEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI5CuAEAAI7y/wPGjtb5n/sQFgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Price Column의 데이터값 분포\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "y_train_df = mercari_df['price']\n", + "plt.figure(figsize=(6,4))\n", + "sns.histplot(y_train_df,bins=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b4b34af4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAGwCAYAAABrUCsdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAu0lEQVR4nO3de3RU9b3//9c0lyHkkDEXkzAaBFuMYIJwgkLAFhRIoITUuk7RRkdYxaiHS4wJWhEvkSVJ5RJoQ0WhHKBcTFe/FKqiIQEVmnKPTiVAQStKkIRwZJgQiklM5veHh/1zCISLO0xCno+19lrO/rxn5r1HXbz4fPbF4vF4PAIAAMD39gNfNwAAAHCtIFgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBJ/XzfQ0TQ1Neno0aPq0qWLLBaLr9sBAACXwOPx6NSpU7Lb7frBDy48L0WwusqOHj2qmJgYX7cBAACuQEVFhW688cYLjhOsrrIuXbpI+vZfTEhIiI+7AQAAl6KmpkYxMTHGn+MXQrC6ys4u/4WEhBCsAABoZy52Gg8nrwMAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJ/H3dAIDWd1ufvqqsrGyxpmvXrtr7sfPqNAQA1yiCFdABVFZWKmnmuhZriqffe1V6AYBrGUuBAAAAJvFpsNqyZYvGjBkju90ui8WidevWNavZv3+/UlNTZbPZ1KVLFw0cOFCHDx82xuvq6jRlyhRFREQoODhYqampOnLkiNdnuFwuORwO2Ww22Ww2ORwOnTx50qvm8OHDGjNmjIKDgxUREaGMjAzV19d71ezZs0dDhgxRUFCQbrjhBs2YMUMej8e03wMAALRvPg1Wp0+f1u23364FCxacd/xf//qX7rrrLt1666364IMP9I9//EPPP/+8OnXqZNRkZmZq7dq1KiwsVGlpqWpra5WSkqLGxkajJi0tTU6nU0VFRSoqKpLT6ZTD4TDGGxsbNXr0aJ0+fVqlpaUqLCzUmjVrlJ2dbdTU1NRoxIgRstvt2rVrlwoKCjRnzhzl5+e3wi8DAADaI4unjUy5WCwWrV27Vvfee6+x74EHHlBAQIBWrFhx3ve43W5df/31WrFihe6//35J0tGjRxUTE6N33nlHycnJ2r9/v3r37q3t27drwIABkqTt27crMTFR//znPxUbG6t3331XKSkpqqiokN1ulyQVFhZq/Pjxqq6uVkhIiBYuXKhp06bp2LFjslqtkqTf/OY3Kigo0JEjR2SxWM7bY11dnerq6ozXNTU1iomJkdvtVkhIyPf+3YBLEXZ91CWdY3Xi+LGr0xAAtDM1NTWy2WwX/fO7zZ5j1dTUpPXr1+uWW25RcnKyIiMjNWDAAK/lwrKyMjU0NCgpKcnYZ7fbFRcXp61bt0qStm3bJpvNZoQqSRo4cKBsNptXTVxcnBGqJCk5OVl1dXUqKyszaoYMGWKEqrM1R48e1eeff37B48jLyzOWIG02m2JiYr7X7wIAANquNhusqqurVVtbq9/85jcaOXKkiouL9fOf/1z33XefNm/eLEmqqqpSYGCgQkNDvd4bFRWlqqoqoyYyMrLZ50dGRnrVREVFeY2HhoYqMDCwxZqzr8/WnM+0adPkdruNraKi4nJ+BgAA0I602dstNDU1SZJ+9rOf6cknn5Qk9e3bV1u3btVrr72mIUOGXPC9Ho/Ha2nufMt0ZtScXUW90DKgJFmtVq9ZLgAAcO1qszNWERER8vf3V+/evb329+rVy7gqMDo6WvX19XK5XF411dXVxmxSdHS0jh1rft7I8ePHvWrOnXVyuVxqaGhosaa6ulqSms1kAQCAjqnNBqvAwEDdcccdOnDggNf+gwcP6qabbpIkJSQkKCAgQCUlJcZ4ZWWlysvLNWjQIElSYmKi3G63du7cadTs2LFDbrfbq6a8vNzrztTFxcWyWq1KSEgwarZs2eJ1C4bi4mLZ7XZ1797d3IMHAADtkk+XAmtra/Xpp58arw8dOiSn06mwsDB169ZNTz31lO6//3795Cc/0d13362ioiK99dZb+uCDDyRJNptNEyZMUHZ2tsLDwxUWFqapU6cqPj5ew4cPl/TtDNfIkSOVnp6u119/XZL06KOPKiUlRbGxsZKkpKQk9e7dWw6HQ7Nnz9aJEyc0depUpaenG2f+p6Wl6aWXXtL48eP17LPP6pNPPlFubq5eeOGFFpcCAQBAx+HTGavdu3erX79+6tevnyQpKytL/fr10wsvvCBJ+vnPf67XXntNs2bNUnx8vP7whz9ozZo1uuuuu4zPmDdvnu69916NHTtWgwcPVufOnfXWW2/Jz8/PqFm1apXi4+OVlJSkpKQk9enTx+sWDn5+flq/fr06deqkwYMHa+zYsbr33ns1Z84co8Zms6mkpERHjhxR//79NXHiRGVlZSkrK6u1fyYAANBOtJn7WHUUl3ofDMBM3McKAL6fdn8fKwAAgPaGYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJvFpsNqyZYvGjBkju90ui8WidevWXbD2sccek8Vi0fz5873219XVacqUKYqIiFBwcLBSU1N15MgRrxqXyyWHwyGbzSabzSaHw6GTJ0961Rw+fFhjxoxRcHCwIiIilJGRofr6eq+aPXv2aMiQIQoKCtINN9ygGTNmyOPxfJ+fAAAAXEN8GqxOnz6t22+/XQsWLGixbt26ddqxY4fsdnuzsczMTK1du1aFhYUqLS1VbW2tUlJS1NjYaNSkpaXJ6XSqqKhIRUVFcjqdcjgcxnhjY6NGjx6t06dPq7S0VIWFhVqzZo2ys7ONmpqaGo0YMUJ2u127du1SQUGB5syZo/z8fBN+CQAAcC3w9+WXjxo1SqNGjWqx5ssvv9TkyZO1YcMGjR492mvM7XZryZIlWrFihYYPHy5JWrlypWJiYrRx40YlJydr//79Kioq0vbt2zVgwABJ0uLFi5WYmKgDBw4oNjZWxcXF2rdvnyoqKozwNnfuXI0fP14zZ85USEiIVq1apa+//lrLli2T1WpVXFycDh48qPz8fGVlZclisbTCLwQAANqTNn2OVVNTkxwOh5566inddtttzcbLysrU0NCgpKQkY5/dbldcXJy2bt0qSdq2bZtsNpsRqiRp4MCBstlsXjVxcXFeM2LJycmqq6tTWVmZUTNkyBBZrVavmqNHj+rzzz+/4DHU1dWppqbGawMAANemNh2sXnnlFfn7+ysjI+O841VVVQoMDFRoaKjX/qioKFVVVRk1kZGRzd4bGRnpVRMVFeU1HhoaqsDAwBZrzr4+W3M+eXl5xrldNptNMTExLR0yAABox9pssCorK9Nvf/tbLVu27LKX2Twej9d7zvd+M2rOnrjeUn/Tpk2T2+02toqKiks/EAAA0K602WD1t7/9TdXV1erWrZv8/f3l7++vL774QtnZ2erevbskKTo6WvX19XK5XF7vra6uNmaToqOjdezYsWaff/z4ca+ac2edXC6XGhoaWqyprq6WpGYzWd9ltVoVEhLitQEAgGtTmw1WDodDH3/8sZxOp7HZ7XY99dRT2rBhgyQpISFBAQEBKikpMd5XWVmp8vJyDRo0SJKUmJgot9utnTt3GjU7duyQ2+32qikvL1dlZaVRU1xcLKvVqoSEBKNmy5YtXrdgKC4ult1uN4IeAADo2Hx6VWBtba0+/fRT4/WhQ4fkdDoVFhambt26KTw83Ks+ICBA0dHRio2NlSTZbDZNmDBB2dnZCg8PV1hYmKZOnar4+HjjKsFevXpp5MiRSk9P1+uvvy5JevTRR5WSkmJ8TlJSknr37i2Hw6HZs2frxIkTmjp1qtLT040ZprS0NL300ksaP368nn32WX3yySfKzc3VCy+8wBWBAABAko+D1e7du3X33Xcbr7OysiRJ48aN07Jlyy7pM+bNmyd/f3+NHTtWZ86c0bBhw7Rs2TL5+fkZNatWrVJGRoZx9WBqaqrXvbP8/Py0fv16TZw4UYMHD1ZQUJDS0tI0Z84co8Zms6mkpESTJk1S//79FRoaqqysLKNnAAAAi4dbh19VNTU1stlscrvdnG+Fqybs+iglzVzXYk3x9Ht14njz8xEBAJf+53ebPccKAACgvSFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJ/H3dAACcz219+qqysrLFmq5du2rvx86r0xAAXAKCFYA2qbKyUkkz17VYUzz93qvSCwBcKpYCAQAATEKwAgAAMAnBCgAAwCQEKwAAAJNw8joAU3E1H4COjGAFwFRczQegI2MpEAAAwCQEKwAAAJMQrAAAAExCsAIAADCJT4PVli1bNGbMGNntdlksFq1bt84Ya2ho0K9//WvFx8crODhYdrtdDz/8sI4ePer1GXV1dZoyZYoiIiIUHBys1NRUHTlyxKvG5XLJ4XDIZrPJZrPJ4XDo5MmTXjWHDx/WmDFjFBwcrIiICGVkZKi+vt6rZs+ePRoyZIiCgoJ0ww03aMaMGfJ4PKb+JgAAoP3yabA6ffq0br/9di1YsKDZ2L///W99+OGHev755/Xhhx/qL3/5iw4ePKjU1FSvuszMTK1du1aFhYUqLS1VbW2tUlJS1NjYaNSkpaXJ6XSqqKhIRUVFcjqdcjgcxnhjY6NGjx6t06dPq7S0VIWFhVqzZo2ys7ONmpqaGo0YMUJ2u127du1SQUGB5syZo/z8/Fb4ZQAAQHvk09stjBo1SqNGjTrvmM1mU0lJide+goIC3XnnnTp8+LC6desmt9utJUuWaMWKFRo+fLgkaeXKlYqJidHGjRuVnJys/fv3q6ioSNu3b9eAAQMkSYsXL1ZiYqIOHDig2NhYFRcXa9++faqoqJDdbpckzZ07V+PHj9fMmTMVEhKiVatW6euvv9ayZctktVoVFxengwcPKj8/X1lZWbJYLK34SwEAgPagXZ1j5Xa7ZbFYdN1110mSysrK1NDQoKSkJKPGbrcrLi5OW7dulSRt27ZNNpvNCFWSNHDgQNlsNq+auLg4I1RJUnJysurq6lRWVmbUDBkyRFar1avm6NGj+vzzzy/Yc11dnWpqarw2AABwbWo3werrr7/WM888o7S0NIWEhEiSqqqqFBgYqNDQUK/aqKgoVVVVGTWRkZHNPi8yMtKrJioqyms8NDRUgYGBLdacfX225nzy8vKMc7tsNptiYmIu57ABAEA70i6CVUNDgx544AE1NTXp1VdfvWi9x+PxWpo73zKdGTVnT1xvaRlw2rRpcrvdxlZRUXHR/gEAQPvU5h9p09DQoLFjx+rQoUN67733jNkqSYqOjlZ9fb1cLpfXrFV1dbUGDRpk1Bw7dqzZ5x4/ftyYcYqOjtaOHTu8xl0ulxoaGrxqzp2Zqq6ulqRmM1nfZbVavZYPgbaq5lStwq6/8H/LEs/4A4CLadPB6myo+uSTT/T+++8rPDzcazwhIUEBAQEqKSnR2LFjJX37nLLy8nLNmjVLkpSYmCi3262dO3fqzjvvlCTt2LFDbrfbCF+JiYmaOXOmKisr1bVrV0lScXGxrFarEhISjJpnn31W9fX1CgwMNGrsdru6d+/e6r8F0No8TU084w8AviefLgXW1tbK6XTK6XRKkg4dOiSn06nDhw/rm2++0X/9139p9+7dWrVqlRobG1VVVaWqqirj/lI2m00TJkxQdna2Nm3apI8++kgPPfSQ4uPjjasEe/XqpZEjRyo9PV3bt2/X9u3blZ6erpSUFMXGxkqSkpKS1Lt3bzkcDn300UfatGmTpk6dqvT0dGOGLC0tTVarVePHj1d5ebnWrl2r3NxcrggEAAAGn85Y7d69W3fffbfxOisrS5I0btw45eTk6M0335Qk9e3b1+t977//voYOHSpJmjdvnvz9/TV27FidOXNGw4YN07Jly+Tn52fUr1q1ShkZGcbVg6mpqV73zvLz89P69es1ceJEDR48WEFBQUpLS9OcOXOMmrO3f5g0aZL69++v0NBQZWVlGT0DAAD4NFgNHTq0xTuXX8pdzTt16qSCggIVFBRcsCYsLEwrV65s8XO6deumt99+u8Wa+Ph4bdmy5aI9Abg6OC8MQFvTps+xAoCWcF4YgLamXdxuAQAAoD0gWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhNstALhkl3LfqJpTp65SNwDQ9hCsAFyyS7lv1J8n33N1mgGANoilQAAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMIlPg9WWLVs0ZswY2e12WSwWrVu3zmvc4/EoJydHdrtdQUFBGjp0qPbu3etVU1dXpylTpigiIkLBwcFKTU3VkSNHvGpcLpccDodsNptsNpscDodOnjzpVXP48GGNGTNGwcHBioiIUEZGhurr671q9uzZoyFDhigoKEg33HCDZsyYIY/HY9rvAQAA2jefBqvTp0/r9ttv14IFC847PmvWLOXn52vBggXatWuXoqOjNWLECJ06dcqoyczM1Nq1a1VYWKjS0lLV1tYqJSVFjY2NRk1aWpqcTqeKiopUVFQkp9Mph8NhjDc2Nmr06NE6ffq0SktLVVhYqDVr1ig7O9uoqamp0YgRI2S327Vr1y4VFBRozpw5ys/Pb4VfBgAAtEf+vvzyUaNGadSoUecd83g8mj9/vqZPn6777rtPkrR8+XJFRUVp9erVeuyxx+R2u7VkyRKtWLFCw4cPlyStXLlSMTEx2rhxo5KTk7V//34VFRVp+/btGjBggCRp8eLFSkxM1IEDBxQbG6vi4mLt27dPFRUVstvtkqS5c+dq/PjxmjlzpkJCQrRq1Sp9/fXXWrZsmaxWq+Li4nTw4EHl5+crKytLFovlKvxiAACgLWuz51gdOnRIVVVVSkpKMvZZrVYNGTJEW7dulSSVlZWpoaHBq8ZutysuLs6o2bZtm2w2mxGqJGngwIGy2WxeNXFxcUaokqTk5GTV1dWprKzMqBkyZIisVqtXzdGjR/X5559f8Djq6upUU1PjtQEdXc2pWoVdH9XiVvOdmWkAaC98OmPVkqqqKklSVFSU1/6oqCh98cUXRk1gYKBCQ0Ob1Zx9f1VVlSIjI5t9fmRkpFfNud8TGhqqwMBAr5ru3bs3+56zYz169DjvceTl5emll1666PECHYmnqUlJM9e1WPPnyfdcnWYAwERtdsbqrHOX2Dwez0WX3c6tOV+9GTVnT1xvqZ9p06bJ7XYbW0VFRYu9AwCA9qvNBqvo6GhJ///M1VnV1dXGTFF0dLTq6+vlcrlarDl27Fizzz9+/LhXzbnf43K51NDQ0GJNdXW1pOazat9ltVoVEhLitQEAgGtTmw1WPXr0UHR0tEpKSox99fX12rx5swYNGiRJSkhIUEBAgFdNZWWlysvLjZrExES53W7t3LnTqNmxY4fcbrdXTXl5uSorK42a4uJiWa1WJSQkGDVbtmzxugVDcXGx7HZ7syVCAADQMfk0WNXW1srpdMrpdEr69oR1p9Opw4cPy2KxKDMzU7m5uVq7dq3Ky8s1fvx4de7cWWlpaZIkm82mCRMmKDs7W5s2bdJHH32khx56SPHx8cZVgr169dLIkSOVnp6u7du3a/v27UpPT1dKSopiY2MlSUlJSerdu7ccDoc++ugjbdq0SVOnTlV6eroxw5SWliar1arx48ervLxca9euVW5uLlcEAgAAg09PXt+9e7fuvvtu43VWVpYkady4cVq2bJmefvppnTlzRhMnTpTL5dKAAQNUXFysLl26GO+ZN2+e/P39NXbsWJ05c0bDhg3TsmXL5OfnZ9SsWrVKGRkZxtWDqampXvfO8vPz0/r16zVx4kQNHjxYQUFBSktL05w5c4wam82mkpISTZo0Sf3791doaKiysrKMngEAAHwarIYOHdrincstFotycnKUk5NzwZpOnTqpoKBABQUFF6wJCwvTypUrW+ylW7duevvtt1usiY+P15YtW1qsAQAAHVebPccKAACgvSFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJjkioLVzTffrK+++qrZ/pMnT+rmm2/+3k0BAAC0R1cUrD7//HM1NjY2219XV6cvv/zyezcFAADQHl3WDULffPNN4583bNggm81mvG5sbNSmTZt4bh4AAOiwLitY3XvvvZK+vSP6uHHjvMYCAgLUvXt3zZ0717TmAAAA2pPLClZNTU2SpB49emjXrl2KiIholaYAAADaoyt6VuChQ4fM7gMAAKDdu+KHMG/atEmbNm1SdXW1MZN11v/8z/9878YAAADamysKVi+99JJmzJih/v37q2vXrrJYLGb3BQAA0O5cUbB67bXXtGzZMjkcDrP7AQAAaLeu6D5W9fX1GjRokNm9AAAAtGtXFKweeeQRrV692uxeAMB0NadqFXZ9VIvbbX36+rpNANeIK1oK/Prrr7Vo0SJt3LhRffr0UUBAgNd4fn6+Kc0BwPflaWpS0sx1LdYUT7/3qvQC4Np3RcHq448/Vt++fSVJ5eXlXmOcyA4AADqqKwpW77//vtl9AAAAtHtXdI4VAAAAmruiGau77767xSW/995774obAgAAaK+uKFidPb/qrIaGBjmdTpWXlzd7ODMAAEBHcUXBat68eefdn5OTo9ra2u/VEAAAQHtl6jlWDz30EM8JBAAAHZapwWrbtm3q1KmTmR8JAADQblzRUuB9993n9drj8aiyslK7d+/W888/b0pjAAAA7c0VBSubzeb1+gc/+IFiY2M1Y8YMJSUlmdIYAABAe3NFwWrp0qVm9wEAANDuXVGwOqusrEz79++XxWJR79691a9fP7P6AgAAaHeuKFhVV1frgQce0AcffKDrrrtOHo9Hbrdbd999twoLC3X99deb3ScAAECbd0VXBU6ZMkU1NTXau3evTpw4IZfLpfLyctXU1CgjI8PsHgEAANqFKwpWRUVFWrhwoXr16mXs6927t37/+9/r3XffNa25b775Rs8995x69OihoKAg3XzzzZoxY4aampqMGo/Ho5ycHNntdgUFBWno0KHau3ev1+fU1dVpypQpioiIUHBwsFJTU3XkyBGvGpfLJYfDIZvNJpvNJofDoZMnT3rVHD58WGPGjFFwcLAiIiKUkZGh+vp6044XAAC0b1cUrJqamhQQENBsf0BAgFfo+b5eeeUVvfbaa1qwYIH279+vWbNmafbs2SooKDBqZs2apfz8fC1YsEC7du1SdHS0RowYoVOnThk1mZmZWrt2rQoLC1VaWqra2lqlpKSosbHRqElLS5PT6VRRUZGKiorkdDrlcDiM8cbGRo0ePVqnT59WaWmpCgsLtWbNGmVnZ5t2vAAAoH27onOs7rnnHj3xxBN64403ZLfbJUlffvmlnnzySQ0bNsy05rZt26af/exnGj16tCSpe/fueuONN7R7925J385WzZ8/X9OnTzfurbV8+XJFRUVp9erVeuyxx+R2u7VkyRKtWLFCw4cPlyStXLlSMTEx2rhxo5KTk7V//34VFRVp+/btGjBggCRp8eLFSkxM1IEDBxQbG6vi4mLt27dPFRUVxjHPnTtX48eP18yZMxUSEmLacQMAgPbpimasFixYoFOnTql79+764Q9/qB/96Efq0aOHTp065TWb9H3ddddd2rRpkw4ePChJ+sc//qHS0lL99Kc/lSQdOnRIVVVVXvfOslqtGjJkiLZu3Srp2ysXGxoavGrsdrvi4uKMmm3btslmsxmhSpIGDhwom83mVRMXF2eEKklKTk5WXV2dysrKLngMdXV1qqmp8doAAMC16YpmrGJiYvThhx+qpKRE//znP+XxeNS7d29jRsgsv/71r+V2u3XrrbfKz89PjY2Nmjlzpn75y19KkqqqqiRJUVFRXu+LiorSF198YdQEBgYqNDS0Wc3Z91dVVSkyMrLZ90dGRnrVnPs9oaGhCgwMNGrOJy8vTy+99NLlHDYAAGinLmvG6r333lPv3r2NWZcRI0ZoypQpysjI0B133KHbbrtNf/vb30xr7k9/+pNWrlyp1atX68MPP9Ty5cs1Z84cLV++3KvOYrF4vfZ4PM32nevcmvPVX0nNuaZNmya3221sFRUVLfYFAADar8sKVvPnz1d6evp5zyey2Wx67LHHlJ+fb1pzTz31lJ555hk98MADio+Pl8Ph0JNPPqm8vDxJUnR0tCQ1mzGqrq42Zpeio6NVX18vl8vVYs2xY8eaff/x48e9as79HpfLpYaGhmYzWd9ltVoVEhLitQEAgGvTZQWrf/zjHxo5cuQFx5OSklo83+hy/fvf/9YPfuDdop+fn3HlYY8ePRQdHa2SkhJjvL6+Xps3b9agQYMkSQkJCQoICPCqqaysVHl5uVGTmJgot9utnTt3GjU7duyQ2+32qikvL1dlZaVRU1xcLKvVqoSEBNOOGQAAtF+XdY7VsWPHznubBePD/P11/Pjx793UWWPGjNHMmTPVrVs33Xbbbfroo4+Un5+vX/3qV5K+XZrLzMxUbm6uevbsqZ49eyo3N1edO3dWWlqapG9n0iZMmKDs7GyFh4crLCxMU6dOVXx8vHFOWK9evTRy5Eilp6fr9ddflyQ9+uijSklJUWxsrKRvQ2Pv3r3lcDg0e/ZsnThxQlOnTr3gDB4AAOh4LitY3XDDDdqzZ49+9KMfnXf8448/VteuXU1pTJIKCgr0/PPPa+LEiaqurpbdbtdjjz2mF154wah5+umndebMGU2cOFEul0sDBgxQcXGxunTpYtTMmzdP/v7+Gjt2rM6cOaNhw4Zp2bJl8vPzM2pWrVqljIwM4+rB1NRULViwwBj38/PT+vXrNXHiRA0ePFhBQUFKS0vTnDlzTDteAADQvl1WsPrpT3+qF154QaNGjVKnTp28xs6cOaMXX3xRKSkppjXXpUsXzZ8/X/Pnz79gjcViUU5OjnJyci5Y06lTJxUUFLR4K4iwsDCtXLmyxX66deumt99++2JtAwCADuqygtVzzz2nv/zlL7rllls0efJkxcbGymKxaP/+/fr973+vxsZGTZ8+vbV6BQAAaNMuK1hFRUVp69at+u///m9NmzZNHo9H0rezRsnJyXr11VdbvEIOAADgWnbZNwi96aab9M4778jlcunTTz+Vx+NRz549m92AEwAAoKO5ojuvS9/edfyOO+4wsxcAAIB27YqeFQgAAIDmCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIRgBQAAYBKCFQAAgEkIVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYxN/XDQCAr9WcqlXY9VEXrevatav2fuxs/YYAtFsEKwAdnqepSUkz1120rnj6va3eC4D2rc0vBX755Zd66KGHFB4ers6dO6tv374qKyszxj0ej3JycmS32xUUFKShQ4dq7969Xp9RV1enKVOmKCIiQsHBwUpNTdWRI0e8alwulxwOh2w2m2w2mxwOh06ePOlVc/jwYY0ZM0bBwcGKiIhQRkaG6uvrW+3YgUtxW5++Crs+qsWt5tQpX7cJAB1Cm56xcrlcGjx4sO6++269++67ioyM1L/+9S9dd911Rs2sWbOUn5+vZcuW6ZZbbtHLL7+sESNG6MCBA+rSpYskKTMzU2+99ZYKCwsVHh6u7OxspaSkqKysTH5+fpKktLQ0HTlyREVFRZKkRx99VA6HQ2+99ZYkqbGxUaNHj9b111+v0tJSffXVVxo3bpw8Ho8KCgqu7g8DfEdlZeVFZ1v+PPmeq9MMAHRwbTpYvfLKK4qJidHSpUuNfd27dzf+2ePxaP78+Zo+fbruu+8+SdLy5csVFRWl1atX67HHHpPb7daSJUu0YsUKDR8+XJK0cuVKxcTEaOPGjUpOTtb+/ftVVFSk7du3a8CAAZKkxYsXKzExUQcOHFBsbKyKi4u1b98+VVRUyG63S5Lmzp2r8ePHa+bMmQoJCTnvMdTV1amurs54XVNTY+pvBAAA2o42vRT45ptvqn///vrFL36hyMhI9evXT4sXLzbGDx06pKqqKiUlJRn7rFarhgwZoq1bt0qSysrK1NDQ4FVjt9sVFxdn1Gzbtk02m80IVZI0cOBA2Ww2r5q4uDgjVElScnKy6urqvJYmz5WXl2csL9psNsXExHzPXwUAALRVbTpYffbZZ1q4cKF69uypDRs26PHHH1dGRob++Mc/SpKqqqokSVFR3lfzREVFGWNVVVUKDAxUaGhoizWRkZHNvj8yMtKr5tzvCQ0NVWBgoFFzPtOmTZPb7Ta2ioqKy/kJAABAO9KmlwKbmprUv39/5ebmSpL69eunvXv3auHChXr44YeNOovF4vU+j8fTbN+5zq05X/2V1JzLarXKarW22AsAALg2tOkZq65du6p3795e+3r16qXDhw9LkqKjoyWp2YxRdXW1MbsUHR2t+vp6uVyuFmuOHTvW7PuPHz/uVXPu97hcLjU0NDSbyQLQcV3KVZq39enr6zYBtJI2PWM1ePBgHThwwGvfwYMHddNNN0mSevTooejoaJWUlKhfv36SpPr6em3evFmvvPKKJCkhIUEBAQEqKSnR2LFjJX17FVV5eblmzZolSUpMTJTb7dbOnTt15513SpJ27Nght9utQYMGGTUzZ85UZWWlunbtKkkqLi6W1WpVQkJCK/8SANqLS7lKk/thAdeuNh2snnzySQ0aNEi5ubkaO3asdu7cqUWLFmnRokWSvl2ay8zMVG5urnr27KmePXsqNzdXnTt3VlpamiTJZrNpwoQJys7OVnh4uMLCwjR16lTFx8cbVwn26tVLI0eOVHp6ul5//XVJ395uISUlRbGxsZKkpKQk9e7dWw6HQ7Nnz9aJEyc0depUpaenX/CKQAAA0LG06WB1xx13aO3atZo2bZpmzJihHj16aP78+XrwwQeNmqefflpnzpzRxIkT5XK5NGDAABUXFxv3sJKkefPmyd/fX2PHjtWZM2c0bNgwLVu2zLiHlSStWrVKGRkZxtWDqampWrBggTHu5+en9evXa+LEiRo8eLCCgoKUlpamOXPmXIVfAgAAtAdtOlhJUkpKilJSUi44brFYlJOTo5ycnAvWdOrUSQUFBS3eyDMsLEwrV65ssZdu3brp7bffvmjPAACgY2rTJ68DAAC0JwQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkbf7O60BHdlufvqqsrGyxpubUqavUDQDgYghWQBtWWVmppJnrWqz58+R7rk4zAICLYikQAADAJMxYAcAlqjlVq7Droy5Sw9Is0JERrADgEnmamliaBdAilgIBAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMEm7ClZ5eXmyWCzKzMw09nk8HuXk5MhutysoKEhDhw7V3r17vd5XV1enKVOmKCIiQsHBwUpNTdWRI0e8alwulxwOh2w2m2w2mxwOh06ePOlVc/jwYY0ZM0bBwcGKiIhQRkaG6uvrW+twAQBAO9NugtWuXbu0aNEi9enTx2v/rFmzlJ+frwULFmjXrl2Kjo7WiBEjdOrUKaMmMzNTa9euVWFhoUpLS1VbW6uUlBQ1NjYaNWlpaXI6nSoqKlJRUZGcTqccDocx3tjYqNGjR+v06dMqLS1VYWGh1qxZo+zs7NY/eAAA0C60i2BVW1urBx98UIsXL1ZoaKix3+PxaP78+Zo+fbruu+8+xcXFafny5fr3v/+t1atXS5LcbreWLFmiuXPnavjw4erXr59WrlypPXv2aOPGjZKk/fv3q6ioSH/4wx+UmJioxMRELV68WG+//bYOHDggSSouLta+ffu0cuVK9evXT8OHD9fcuXO1ePFi1dTUXLD3uro61dTUeG0AAODa1C6C1aRJkzR69GgNHz7ca/+hQ4dUVVWlpKQkY5/VatWQIUO0detWSVJZWZkaGhq8aux2u+Li4oyabdu2yWazacCAAUbNwIEDZbPZvGri4uJkt9uNmuTkZNXV1amsrOyCvefl5RnLizabTTExMd/jlwAAAG1Zmw9WhYWF+vDDD5WXl9dsrKqqSpIUFRXltT8qKsoYq6qqUmBgoNdM1/lqIiMjm31+ZGSkV8253xMaGqrAwECj5nymTZsmt9ttbBUVFRc7ZAAA0E75+7qBllRUVOiJJ55QcXGxOnXqdME6i8Xi9drj8TTbd65za85XfyU157JarbJarS32AgAArg1tesaqrKxM1dXVSkhIkL+/v/z9/bV582b97ne/k7+/vzGDdO6MUXV1tTEWHR2t+vp6uVyuFmuOHTvW7PuPHz/uVXPu97hcLjU0NDSbyQIAAB1Tmw5Ww4YN0549e+R0Oo2tf//+evDBB+V0OnXzzTcrOjpaJSUlxnvq6+u1efNmDRo0SJKUkJCggIAAr5rKykqVl5cbNYmJiXK73dq5c6dRs2PHDrndbq+a8vJyVVZWGjXFxcWyWq1KSEho1d8BAAC0D216KbBLly6Ki4vz2hccHKzw8HBjf2ZmpnJzc9WzZ0/17NlTubm56ty5s9LS0iRJNptNEyZMUHZ2tsLDwxUWFqapU6cqPj7eOBm+V69eGjlypNLT0/X6669Lkh599FGlpKQoNjZWkpSUlKTevXvL4XBo9uzZOnHihKZOnar09HSFhIRcrZ8EAAC0YW06WF2Kp59+WmfOnNHEiRPlcrk0YMAAFRcXq0uXLkbNvHnz5O/vr7Fjx+rMmTMaNmyYli1bJj8/P6Nm1apVysjIMK4eTE1N1YIFC4xxPz8/rV+/XhMnTtTgwYMVFBSktLQ0zZkz5+odLAAAaNPaXbD64IMPvF5bLBbl5OQoJyfngu/p1KmTCgoKVFBQcMGasLAwrVy5ssXv7tatm95+++3LaRcAAHQgbfocKwAAgPaEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmKTd3XkduFbc1qev10O9z6fm1Kmr1A0AwAwEK8BHKisrlTRzXYs1f558z9VpBgBgCoIV0AqYjQKAjolgBbQCZqMAoGPi5HUAAACTEKwAAABMQrACAAAwCedYAcBVVnOqVmHXR7VY8+8zX6tzUKcWa7p27aq9HztN7AzA90WwAoCrzNPUdEkXNyTlF7VYUzz9XvOaAmAKlgIBAABMQrACAAAwCcEKAADAJAQrAAAAkxCsAAAATEKwAgAAMAnBCgAAwCQEKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkPCsQANqpS3mYMw9qBq4ughUAtFOX8jBnHtQMXF0sBQIAAJikTQervLw83XHHHerSpYsiIyN177336sCBA141Ho9HOTk5stvtCgoK0tChQ7V3716vmrq6Ok2ZMkUREREKDg5Wamqqjhw54lXjcrnkcDhks9lks9nkcDh08uRJr5rDhw9rzJgxCg4OVkREhDIyMlRfX98qxw4AANqfNh2sNm/erEmTJmn79u0qKSnRN998o6SkJJ0+fdqomTVrlvLz87VgwQLt2rVL0dHRGjFihE6dOmXUZGZmau3atSosLFRpaalqa2uVkpKixsZGoyYtLU1Op1NFRUUqKiqS0+mUw+EwxhsbGzV69GidPn1apaWlKiws1Jo1a5SdnX11fgwAANDmtelzrIqKirxeL126VJGRkSorK9NPfvITeTwezZ8/X9OnT9d9990nSVq+fLmioqK0evVqPfbYY3K73VqyZIlWrFih4cOHS5JWrlypmJgYbdy4UcnJydq/f7+Kioq0fft2DRgwQJK0ePFiJSYm6sCBA4qNjVVxcbH27duniooK2e12SdLcuXM1fvx4zZw5UyEhIVfxlwEAAG1Rm56xOpfb7ZYkhYWFSZIOHTqkqqoqJSUlGTVWq1VDhgzR1q1bJUllZWVqaGjwqrHb7YqLizNqtm3bJpvNZoQqSRo4cKBsNptXTVxcnBGqJCk5OVl1dXUqKyu7YM91dXWqqanx2gAAwLWp3QQrj8ejrKws3XXXXYqLi5MkVVVVSZKiorwvN46KijLGqqqqFBgYqNDQ0BZrIiMjm31nZGSkV8253xMaGqrAwECj5nzy8vKM87ZsNptiYmIu57ABAEA70m6C1eTJk/Xxxx/rjTfeaDZmsVi8Xns8nmb7znVuzfnqr6TmXNOmTZPb7Ta2ioqKFvsCAADtV5s+x+qsKVOm6M0339SWLVt04403Gvujo6MlfTub1LVrV2N/dXW1MbsUHR2t+vp6uVwur1mr6upqDRo0yKg5duxYs+89fvy41+fs2LHDa9zlcqmhoaHZTNZ3Wa1WWa3Wyz1k+MhtffqqsrKyxRpuuAgAuJA2Haw8Ho+mTJmitWvX6oMPPlCPHj28xnv06KHo6GiVlJSoX79+kqT6+npt3rxZr7zyiiQpISFBAQEBKikp0dixYyVJlZWVKi8v16xZsyRJiYmJcrvd2rlzp+68805J0o4dO+R2u43wlZiYqJkzZ6qystIIccXFxbJarUpISGj9HwNXRWVlJTdcBABcsTYdrCZNmqTVq1frr3/9q7p06WKcy2Sz2RQUFCSLxaLMzEzl5uaqZ8+e6tmzp3Jzc9W5c2elpaUZtRMmTFB2drbCw8MVFhamqVOnKj4+3rhKsFevXho5cqTS09P1+uuvS5IeffRRpaSkKDY2VpKUlJSk3r17y+FwaPbs2Tpx4oSmTp2q9PR0rggEAACS2niwWrhwoSRp6NChXvuXLl2q8ePHS5KefvppnTlzRhMnTpTL5dKAAQNUXFysLl26GPXz5s2Tv7+/xo4dqzNnzmjYsGFatmyZ/Pz8jJpVq1YpIyPDuHowNTVVCxYsMMb9/Py0fv16TZw4UYMHD1ZQUJDS0tI0Z86cVjp6AADQ3rTpYOXxeC5aY7FYlJOTo5ycnAvWdOrUSQUFBSooKLhgTVhYmFauXNnid3Xr1k1vv/32RXsCAAAdU7u5KhAAAKCta9MzVkBbVHOqVmHXX/hK0G9rTrU4DgC4NhGsgMvkaWq66JWDf558z9VpBgDQprAUCAAAYBKCFQAAgEkIVgAAACbhHCsAuIZdysUWPKYJMA/BCgCuYZdyscX/yxhO+AJMQrACgA7uUsIXz8gELg3nWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASQhWAAAAJiFYAQAAmIQ7rwMALopnDgKXhmAFALgoHnsDXBqCFQDAFMxqAQQrAIBJmNUCOHkdAADANAQrAAAAk7AUCAC4ajgPC9c6ghUA4KrhPCxc61gKBAAAMAkzVgCANoXlQrRnBCsAQJvCciHaM4IVAKDdYVYLbRXBCgDQ7jCrhbaKYAUAuCYxqwVfIFhdgVdffVWzZ89WZWWlbrvtNs2fP18//vGPfd0WAOA7LmVW6/9lDCd8wVQEq8v0pz/9SZmZmXr11Vc1ePBgvf766xo1apT27dunbt26+bo9AMBlIHzBbASry5Sfn68JEybokUcekSTNnz9fGzZs0MKFC5WXl+fT3m7r01eVlZUt1vA/PwBcHsIXLgfB6jLU19errKxMzzzzjNf+pKQkbd269bzvqaurU11dnfHa7XZLkmpqakzv7+iXX+qeF95osea9Gb9sle++VniamtRw5nTLNR4PNR2wpi32RE3bqWlqbNTdz61qsWbd06kKDb++xZp/f12nzp2sbaYmKjpaO7f9vcWajuLsn50ej6flQg8u2ZdffumR5Pn73//utX/mzJmeW2655bzvefHFFz2S2NjY2NjY2K6BraKiosWswIzVFbBYLF6vPR5Ps31nTZs2TVlZWcbrpqYmnThxQuHh4Rd8z5WoqalRTEyMKioqFBISYtrntkUd6ViljnW8HOu1qSMdq9SxjrcjHavH49GpU6dkt9tbrCNYXYaIiAj5+fmpqqrKa391dbWios6/tm61WmW1ek+1Xnfdda3VokJCQq75/7jP6kjHKnWs4+VYr00d6ViljnW8HeVYbTbbRWt4CPNlCAwMVEJCgkpKSrz2l5SUaNCgQT7qCgAAtBXMWF2mrKwsORwO9e/fX4mJiVq0aJEOHz6sxx9/3NetAQAAHyNYXab7779fX331lWbMmKHKykrFxcXpnXfe0U033eTTvqxWq1588cVmy47Xoo50rFLHOl6O9drUkY5V6ljH25GO9VJZPJ6LXTcIAACAS8E5VgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFbXiFdffVU9evRQp06dlJCQoL/97W++bsl0W7Zs0ZgxY2S322WxWLRu3Tpft9Rq8vLydMcdd6hLly6KjIzUvffeqwMHDvi6rVazcOFC9enTx7jJYGJiot59911ft9Xq8vLyZLFYlJmZ6etWWkVOTo4sFovXFh0d7eu2Ws2XX36phx56SOHh4ercubP69u2rsrIyX7fVKrp3797s363FYtGkSZN83ZrPEayuAX/605+UmZmp6dOn66OPPtKPf/xjjRo1SocPH/Z1a6Y6ffq0br/9di1YsMDXrbS6zZs3a9KkSdq+fbtKSkr0zTffKCkpSadPX/xBwe3RjTfeqN/85jfavXu3du/erXvuuUc/+9nPtHfvXl+31mp27dqlRYsWqU+fPr5upVXddtttqqysNLY9e/b4uqVW4XK5NHjwYAUEBOjdd9/Vvn37NHfu3FZ90oYv7dq1y+vf69kbZ//iF7/wcWdtgClPJ4ZP3XnnnZ7HH3/ca9+tt97qeeaZZ3zUUeuT5Fm7dq2v27hqqqurPZI8mzdv9nUrV01oaKjnD3/4g6/baBWnTp3y9OzZ01NSUuIZMmSI54knnvB1S63ixRdf9Nx+++2+buOq+PWvf+256667fN2GzzzxxBOeH/7wh56mpiZft+JzzFi1c/X19SorK1NSUpLX/qSkJG3dutVHXcFsbrdbkhQWFubjTlpfY2OjCgsLdfr0aSUmJvq6nVYxadIkjR49WsOHD/d1K63uk08+kd1uV48ePfTAAw/os88+83VLreLNN99U//799Ytf/EKRkZHq16+fFi9e7Ou2ror6+nqtXLlSv/rVr2SxWHzdjs8RrNq5//3f/1VjY2Ozh0BHRUU1e1g02iePx6OsrCzdddddiouL83U7rWbPnj36j//4D1mtVj3++ONau3atevfu7eu2TFdYWKgPP/xQeXl5vm6l1Q0YMEB//OMftWHDBi1evFhVVVUaNGiQvvrqK1+3ZrrPPvtMCxcuVM+ePbVhwwY9/vjjysjI0B//+Edft9bq1q1bp5MnT2r8+PG+bqVN4JE214hz/5bg8Xj4m8M1YvLkyfr4449VWlrq61ZaVWxsrJxOp06ePKk1a9Zo3Lhx2rx58zUVrioqKvTEE0+ouLhYnTp18nU7rW7UqFHGP8fHxysxMVE//OEPtXz5cmVlZfmwM/M1NTWpf//+ys3NlST169dPe/fu1cKFC/Xwww/7uLvWtWTJEo0aNUp2u93XrbQJzFi1cxEREfLz82s2O1VdXd1sFgvtz5QpU/Tmm2/q/fff14033ujrdlpVYGCgfvSjH6l///7Ky8vT7bffrt/+9re+bstUZWVlqq6uVkJCgvz9/eXv76/Nmzfrd7/7nfz9/dXY2OjrFltVcHCw4uPj9cknn/i6FdN17dq12V8CevXqdc1dRHSuL774Qhs3btQjjzzi61baDIJVOxcYGKiEhATjioyzSkpKNGjQIB91he/L4/Fo8uTJ+stf/qL33ntPPXr08HVLV53H41FdXZ2v2zDVsGHDtGfPHjmdTmPr37+/HnzwQTmdTvn5+fm6xVZVV1en/fv3q2vXrr5uxXSDBw9udkuUgwcP6qabbvJRR1fH0qVLFRkZqdGjR/u6lTaDpcBrQFZWlhwOh/r376/ExEQtWrRIhw8f1uOPP+7r1kxVW1urTz/91Hh96NAhOZ1OhYWFqVu3bj7szHyTJk3S6tWr9de//lVdunQxZiRtNpuCgoJ83J35nn32WY0aNUoxMTE6deqUCgsL9cEHH6ioqMjXrZmqS5cuzc6TCw4OVnh4+DV5/tzUqVM1ZswYdevWTdXV1Xr55ZdVU1OjcePG+bo10z355JMaNGiQcnNzNXbsWO3cuVOLFi3SokWLfN1aq2lqatLSpUs1btw4+fsTJwy+vSgRZvn973/vuemmmzyBgYGe//zP/7wmL8t///33PZKabePGjfN1a6Y733FK8ixdutTXrbWKX/3qV8Z/v9dff71n2LBhnuLiYl+3dVVcy7dbuP/++z1du3b1BAQEeOx2u+e+++7z7N2719dttZq33nrLExcX57FarZ5bb73Vs2jRIl+31Ko2bNjgkeQ5cOCAr1tpUywej8fjm0gHAABwbeEcKwAAAJMQrAAAAExCsAIAADAJwQoAAMAkBCsAAACTEKwAAABMQrACAAAwCcEKAADAJAQrAPgePv/8c1ksFjmdTl+3AqAN4M7rAPA9NDY26vjx44qIiOB5aQAIVgBwperr6xUYGOjrNgC0ISwFAsD/GTp0qCZPnqzJkyfruuuuU3h4uJ577jmd/ftn9+7d9fLLL2v8+PGy2WxKT08/71Lg3r17NXr0aIWEhKhLly768Y9/rH/961/G+NKlS9WrVy916tRJt956q1599dWrfagAWgnz1gDwHcuXL9eECRO0Y8cO7d69W48++qhuuukmpaenS5Jmz56t559/Xs8999x53//ll1/qJz/5iYYOHar33ntPISEh+vvf/65vvvlGkrR48WK9+OKLWrBggfr166ePPvpI6enpCg4O1rhx467acQJoHSwFAsD/GTp0qKqrq7V3715ZLBZJ0jPPPKM333xT+/btU/fu3dWvXz+tXbvWeM/nn3+uHj166KOPPlLfvn317LPPqrCwUAcOHFBAQECz7+jWrZteeeUV/fKXvzT2vfzyy3rnnXe0devW1j9IAK2KpUAA+I6BAwcaoUqSEhMT9cknn6ixsVGS1L9//xbf73Q69eMf//i8oer48eOqqKjQhAkT9B//8R/G9vLLL3stFQJov1gKBIDLEBwc3OJ4UFDQBceampokfbscOGDAAK8xPz+/798cAJ8jWAHAd2zfvr3Z6549e15y8OnTp4+WL1+uhoaGZrNWUVFRuuGGG/TZZ5/pwQcfNK1nAG0HS4EA8B0VFRXKysrSgQMH9MYbb6igoEBPPPHEJb9/8uTJqqmp0QMPPKDdu3frk08+0YoVK3TgwAFJUk5OjvLy8vTb3/5WBw8e1J49e7R06VLl5+e31iEBuIqYsQKA73j44Yd15swZ3XnnnfLz89OUKVP06KOPXvL7w8PD9d577+mpp57SkCFD5Ofnp759+2rw4MGSpEceeUSdO3fW7Nmz9fTTTys4OFjx8fHKzMxspSMCcDVxVSAA/J+hQ4eqb9++mj9/vq9bAdBOsRQIAABgEoIVAACASVgKBAAAMAkzVgAAACYhWAEAAJiEYAUAAGASghUAAIBJCFYAAAAmIVgBAACYhGAFAABgEoIVAACASf4/9HE9yvvKv10AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Log Transformation\n", + "import numpy as np\n", + "y_train_df = np.log1p(y_train_df)\n", + "sns.histplot(y_train_df,bins=50)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "152603c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2.397895\n", + "1 3.970292\n", + "2 2.397895\n", + "Name: price, dtype: float64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 로그 변환한 값을 데이터프레임에 넣기\n", + "mercari_df['price'] = np.log1p(mercari_df['price'])\n", + "mercari_df['price'].head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "160f4f7b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shipping 값 유형:\n", + " shipping\n", + "0 819435\n", + "1 663100\n", + "Name: count, dtype: int64\n", + "item_condition_id 값 유형:\n", + " item_condition_id\n", + "1 640549\n", + "3 432161\n", + "2 375479\n", + "4 31962\n", + "5 2384\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Shipping & item_condition 값 유형 체크\n", + "print('Shipping 값 유형:\\n',mercari_df['shipping'].value_counts())\n", + "print('item_condition_id 값 유형:\\n',mercari_df['item_condition_id'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a3eff2e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "82489" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# null 와 비슷한 \"No description yet\"값 확인 \n", + "boolean_cond = mercari_df['item_description']=='No description yet'\n", + "mercari_df[boolean_cond]['item_description'].count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f41b346", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "대분류 유형 :\n", + " cat_dae\n", + "Women 664385\n", + "Beauty 207828\n", + "Kids 171689\n", + "Electronics 122690\n", + "Men 93680\n", + "Home 67871\n", + "Vintage & Collectibles 46530\n", + "Other 45351\n", + "Handmade 30842\n", + "Sports & Outdoors 25342\n", + "Other_Null 6327\n", + "Name: count, dtype: int64\n", + "중분류 갯수 : 114\n", + "소분류 갯수 : 871\n" + ] + } + ], + "source": [ + "# category_name column을 대,중,소로 분리\n", + "def split_cat(category_name):\n", + " try:\n", + " return category_name.split('/')\n", + " except:\n", + " return ['Other_Null','Other_Null','Other_Null']\n", + "mercari_df['cat_dae'],mercari_df['cat_jung'],mercari_df['cat_so'] = zip(*mercari_df['category_name'].apply(lambda x: split_cat(x)))\n", + "\n", + "print('대분류 유형 :\\n', mercari_df['cat_dae'].value_counts())\n", + "# 갯수만 출력\n", + "print('중분류 갯수 :', mercari_df['cat_jung'].nunique())\n", + "print('소분류 갯수 :', mercari_df['cat_so'].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d16508a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "train_id 0\n", + "name 0\n", + "item_condition_id 0\n", + "category_name 0\n", + "brand_name 0\n", + "price 0\n", + "shipping 0\n", + "item_description 0\n", + "cat_dae 0\n", + "cat_jung 0\n", + "cat_so 0\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fill NaN values with Other_Null\n", + "mercari_df['brand_name'] = mercari_df['brand_name'].fillna(value='Other_Null')\n", + "mercari_df['category_name'] = mercari_df['category_name'].fillna(value='Other_Null')\n", + "mercari_df['item_description'] = mercari_df['item_description'].fillna(value='Other_Null')\n", + "\n", + "# Checking if Null values left\n", + "mercari_df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "id": "4cbb83f2", + "metadata": {}, + "source": [ + "# 피처 인코딩과 피처 벡터화" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4ff13773", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "brand name 의 유형 건수 : 4810\n", + "brand name sample 5건 : \n", + " brand_name\n", + "Other_Null 632682\n", + "PINK 54088\n", + "Nike 54043\n", + "Victoria's Secret 48036\n", + "LuLaRoe 31024\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print('brand name 의 유형 건수 :', mercari_df['brand_name'].nunique())\n", + "print('brand name sample 5건 : \\n', mercari_df['brand_name'].value_counts()[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4aec6c0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name 의 종류 갯수 : 1225273\n", + "name sample 7건 : \n", + " 0 MLB Cincinnati Reds T Shirt Size XL\n", + "1 Razer BlackWidow Chroma Keyboard\n", + "2 AVA-VIV Blouse\n", + "3 Leather Horse Statues\n", + "4 24K GOLD plated rose\n", + "5 Bundled items requested for Ruie\n", + "6 Acacia pacific tides santorini top\n", + "Name: name, dtype: object\n" + ] + } + ], + "source": [ + "print('name 의 종류 갯수 :', mercari_df['name'].nunique())\n", + "print('name sample 7건 : \\n', mercari_df['name'][:7])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2f12dac0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "item_description 평균 문자열 개수: 145.71139703278507\n" + ] + }, + { + "data": { + "text/plain": [ + "0 No description yet\n", + "1 This keyboard is in great condition and works like it came out of the box. All of the ports are tested and work perfectly. The lights are customizable via the Razer Synapse app on your PC.\n", + "Name: item_description, dtype: object" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.set_option('max_colwidth',200)\n", + "# item_description의 평균 문자열 개수\n", + "print('item_description 평균 문자열 개수:',mercari_df['item_description'].str.len().mean())\n", + "mercari_df['item_description'][:2]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "eb15eed6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name vectorization shape: (1482535, 105757)\n", + "item_description vectorization shape: (1482535, 50000)\n" + ] + } + ], + "source": [ + "# Feature Vectorization of name feature (CountVectorizer)\n", + "cnt_vec = CountVectorizer()\n", + "X_name = cnt_vec.fit_transform(mercari_df.name)\n", + "\n", + "# Feature Vectorization of item_description feature (TfidfVectorizer)\n", + "tfidf_descp = TfidfVectorizer(max_features = 50000, ngram_range= (1,3) , stop_words='english')\n", + "X_descp = tfidf_descp.fit_transform(mercari_df['item_description'])\n", + "\n", + "print('name vectorization shape:',X_name.shape)\n", + "print('item_description vectorization shape:',X_descp.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "4dd4ad1e", + "metadata": {}, + "outputs": [], + "source": [ + "# LabelBinarizer로 원-핫 인코딩 변환\n", + "from sklearn.preprocessing import LabelBinarizer\n", + "# brand_name\n", + "lb_brand_name = LabelBinarizer(sparse_output=True)\n", + "X_brand = lb_brand_name.fit_transform(mercari_df['brand_name'])\n", + "# item_condition\n", + "lb_item_cond_id = LabelBinarizer(sparse_output=True)\n", + "X_item_cond_id = lb_item_cond_id.fit_transform(mercari_df['item_condition_id'])\n", + "# shipping\n", + "lb_shipping= LabelBinarizer(sparse_output=True)\n", + "X_shipping = lb_shipping.fit_transform(mercari_df['shipping'])\n", + "\n", + "# cat_dae, cat_jung, cat_so \n", + "lb_cat_dae = LabelBinarizer(sparse_output=True)\n", + "X_cat_dae= lb_cat_dae.fit_transform(mercari_df['cat_dae'])\n", + "\n", + "lb_cat_jung = LabelBinarizer(sparse_output=True)\n", + "X_cat_jung = lb_cat_jung.fit_transform(mercari_df['cat_jung'])\n", + "\n", + "lb_cat_so = LabelBinarizer(sparse_output=True)\n", + "X_cat_so = lb_cat_so.fit_transform(mercari_df['cat_so'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "583474cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \n", + "X_brand_shape:(1482535, 4810), X_item_cond_id shape:(1482535, 5)\n", + "X_shipping shape:(1482535, 1), X_cat_dae shape:(1482535, 11)\n", + "X_cat_jung shape:(1482535, 114), X_cat_so shape:(1482535, 871)\n" + ] + } + ], + "source": [ + "print(type(X_brand), type(X_item_cond_id), type(X_shipping))\n", + "print('X_brand_shape:{0}, X_item_cond_id shape:{1}'.format(X_brand.shape, X_item_cond_id.shape))\n", + "print('X_shipping shape:{0}, X_cat_dae shape:{1}'.format(X_shipping.shape, X_cat_dae.shape))\n", + "print('X_cat_jung shape:{0}, X_cat_so shape:{1}'.format(X_cat_jung.shape, X_cat_so.shape))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "c5bc4669", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " (1482535, 161569)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Constructing feature matrix using hstack\n", + "from scipy.sparse import hstack\n", + "import gc \n", + "sparse_matrix_list = (X_name,X_descp,X_brand,X_item_cond_id,X_shipping,X_cat_dae,X_cat_jung,X_cat_so)\n", + "X_features_sparse = hstack(sparse_matrix_list).tocsr()\n", + "print(type(X_features_sparse),X_features_sparse.shape)\n", + "del X_features_sparse\n", + "gc.collect()" + ] + }, + { + "cell_type": "markdown", + "id": "5821ea59", + "metadata": {}, + "source": [ + "# Ridge Regression " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0996aa94", + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluate \n", + "def rmsle(y,y_pred):\n", + " return np.sqrt(np.mean(np.power(np.log1p(y)-np.log1p(y_pred),2)))\n", + "\n", + "def evaluate_org_price(y_test,preds):\n", + " preds_exmpm = np.expm1(preds)\n", + " y_test_exmpm = np.expm1(y_test)\n", + " \n", + " rmsle_result = rmsle(y_test_exmpm, preds_exmpm)\n", + " return rmsle_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d820cf1", + "metadata": {}, + "outputs": [], + "source": [ + "import gc \n", + "from scipy.sparse import hstack\n", + "\n", + "def model_train_predict(model,matrix_list):\n", + " X= hstack(matrix_list).tocsr() \n", + " \n", + " X_train, X_test, y_train, y_test=train_test_split(X, mercari_df['price'], \n", + " test_size=0.2, random_state=156)\n", + " \n", + " # Train\n", + " model.fit(X_train , y_train)\n", + " # Predict\n", + " preds = model.predict(X_test)\n", + " \n", + " # Free Memory\n", + " del X , X_train , X_test , y_train \n", + " gc.collect()\n", + " \n", + " return preds , y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "0bdc39f9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Item Description을 제외했을 때 rmsle 값: 0.4984480211612475\n", + "Item Description을 포함한 rmsle 값: 0.4679507419600776\n" + ] + } + ], + "source": [ + "linear_model = Ridge(solver = \"lsqr\", fit_intercept=False)\n", + "\n", + "sparse_matrix_list = (X_name, X_brand, X_item_cond_id,\n", + " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n", + "linear_preds , y_test = model_train_predict(model=linear_model ,matrix_list=sparse_matrix_list)\n", + "print('Item Description을 제외했을 때 rmsle 값:', evaluate_org_price(y_test , linear_preds))\n", + "\n", + "sparse_matrix_list = (X_descp, X_name, X_brand, X_item_cond_id,\n", + " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n", + "linear_preds , y_test = model_train_predict(model=linear_model , matrix_list=sparse_matrix_list)\n", + "print('Item Description을 포함한 rmsle 값:', evaluate_org_price(y_test ,linear_preds))" + ] + }, + { + "cell_type": "markdown", + "id": "bd4ee00e", + "metadata": {}, + "source": [ + "# LightGBM + Ensemble" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "81b9160b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 222.862067 seconds.\n", + "You can set `force_row_wise=true` to remove the overhead.\n", + "And if memory is not enough, you can set `force_col_wise=true`.\n", + "[LightGBM] [Info] Total Bins 1068421\n", + "[LightGBM] [Info] Number of data points in the train set: 1186028, number of used features: 65338\n", + "[LightGBM] [Info] Start training from score 2.979514\n", + "LightGBM rmsle 값: 0.4569987654210569\n" + ] + } + ], + "source": [ + "from lightgbm import LGBMRegressor\n", + "\n", + "sparse_matrix_list = (X_descp, X_name, X_brand, X_item_cond_id,\n", + " X_shipping, X_cat_dae, X_cat_jung, X_cat_so)\n", + "\n", + "lgbm_model = LGBMRegressor(n_estimators=200, learning_rate=0.5, num_leaves=125, random_state=156)\n", + "lgbm_preds , y_test = model_train_predict(model = lgbm_model , matrix_list=sparse_matrix_list)\n", + "print('LightGBM rmsle 값:', evaluate_org_price(y_test , lgbm_preds))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "3b67ecc5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LightGBM과 Ridge를 ensemble한 최종 rmsle 값: 0.4468973140634917\n" + ] + } + ], + "source": [ + "preds = lgbm_preds * 0.45 + linear_preds * 0.55\n", + "print('LightGBM과 Ridge를 ensemble한 최종 rmsle 값:', evaluate_org_price(y_test , preds))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}