diff --git "a/09_\354\266\224\354\262\234\354\213\234\354\212\244\355\205\234.pdf" "b/09_\354\266\224\354\262\234\354\213\234\354\212\244\355\205\234.pdf" new file mode 100644 index 0000000..1a31051 Binary files /dev/null and "b/09_\354\266\224\354\262\234\354\213\234\354\212\244\355\205\234.pdf" differ diff --git "a/16\354\243\274\354\260\250_\354\230\210\354\212\265.ipynb" "b/16\354\243\274\354\260\250_\354\230\210\354\212\265.ipynb" new file mode 100644 index 0000000..40b1ecc --- /dev/null +++ "b/16\354\243\274\354\260\250_\354\230\210\354\212\265.ipynb" @@ -0,0 +1,1078 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "n7tun8PjDxxg" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "R = np.array([[4, np.nan, np.nan, 2, np.nan],\n", + " [np.nan, 5, np.nan, 3, 1],\n", + " [np.nan, np.nan, 3, 4, 4],\n", + " [5, 2, 1, 2, np.nan]])\n", + "num_users, num_items = R.shape\n", + "K=3\n", + "\n", + "np.random.seed(1)\n", + "P = np.random.normal(scale=1./K, size=(num_users, K))\n", + "Q = np.random.normal(scale=1./K, size=(num_items, K))" + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "\n", + "def get_rmse(R, P, Q, non_zeros):\n", + " error=0\n", + " full_pred_matrix = np.dot(P, Q.T)\n", + "\n", + " x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]\n", + " y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]\n", + " R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]\n", + " full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]\n", + " mse = mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros)\n", + " rmse = np.sqrt(mse)\n", + "\n", + " return rmse" + ], + "metadata": { + "id": "idcoAaQeEqci" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "non_zeros = [(i, j, R[i, j]) for i in range(num_users) for j in range(num_items) if R[i, j] > 0]\n", + "\n", + "steps=1000\n", + "learning_rate=0.01\n", + "r_lambda=0.01\n", + "\n", + "for step in range(steps):\n", + " for i, j, r in non_zeros:\n", + " eij = r - np.dot(P[i, :], Q[j, :].T)\n", + " P[i, :] = P[i, :] + learning_rate * (eij * Q[j, :] - r_lambda * P[i, :])\n", + " Q[j, :] = Q[j, :] + learning_rate * (eij * P[i, :] - r_lambda * Q[j, :])\n", + "\n", + " rmse = get_rmse(R, P, Q, non_zeros)\n", + " if (step % 50) == 0:\n", + " print(\"### iteration step : \", step, \" rmse : \", rmse)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gWlnLOR7FE5P", + "outputId": "459de0c8-db79-490a-db98-dc2fd9b093c5" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "### iteration step : 0 rmse : 3.261355059488935\n", + "### iteration step : 0 rmse : 3.26040057174686\n", + "### iteration step : 0 rmse : 3.253984404542389\n", + "### iteration step : 0 rmse : 3.2521583839863624\n", + "### iteration step : 0 rmse : 3.252335303789125\n", + "### iteration step : 0 rmse : 3.251072196430487\n", + "### iteration step : 0 rmse : 3.2492449982564864\n", + "### iteration step : 0 rmse : 3.247416477570409\n", + "### iteration step : 0 rmse : 3.241926055455223\n", + "### iteration step : 0 rmse : 3.2400454107613084\n", + "### iteration step : 0 rmse : 3.240166740749792\n", + "### iteration step : 0 rmse : 3.2388050277987723\n", + "### iteration step : 50 rmse : 0.5003190892212748\n", + "### iteration step : 50 rmse : 0.5001616291326989\n", + "### iteration step : 50 rmse : 0.49899601202578087\n", + "### iteration step : 50 rmse : 0.4988483450145831\n", + "### iteration step : 50 rmse : 0.49895189256631756\n", + "### iteration step : 50 rmse : 0.49833236830090993\n", + "### iteration step : 50 rmse : 0.4984148489378701\n", + "### iteration step : 50 rmse : 0.49792599580240876\n", + "### iteration step : 50 rmse : 0.4900605568692785\n", + "### iteration step : 50 rmse : 0.4890370238665435\n", + "### iteration step : 50 rmse : 0.48869176023997846\n", + "### iteration step : 50 rmse : 0.4876723101369648\n", + "### iteration step : 100 rmse : 0.15911521988578564\n", + "### iteration step : 100 rmse : 0.1588091617801093\n", + "### iteration step : 100 rmse : 0.1587409221708901\n", + "### iteration step : 100 rmse : 0.1582856952842508\n", + "### iteration step : 100 rmse : 0.1583080948216876\n", + "### iteration step : 100 rmse : 0.15828832993767403\n", + "### iteration step : 100 rmse : 0.15787486893092847\n", + "### iteration step : 100 rmse : 0.15792073606567072\n", + "### iteration step : 100 rmse : 0.15725245215457084\n", + "### iteration step : 100 rmse : 0.15710664164665206\n", + "### iteration step : 100 rmse : 0.15690252144190003\n", + "### iteration step : 100 rmse : 0.1564340384819247\n", + "### iteration step : 150 rmse : 0.07546004875264435\n", + "### iteration step : 150 rmse : 0.07544589133447106\n", + "### iteration step : 150 rmse : 0.07543234329653023\n", + "### iteration step : 150 rmse : 0.07514800672233914\n", + "### iteration step : 150 rmse : 0.07518867696418177\n", + "### iteration step : 150 rmse : 0.0752288950993841\n", + "### iteration step : 150 rmse : 0.07489318864469259\n", + "### iteration step : 150 rmse : 0.07493400425933257\n", + "### iteration step : 150 rmse : 0.07462695506527872\n", + "### iteration step : 150 rmse : 0.07464332131959663\n", + "### iteration step : 150 rmse : 0.0746444164156341\n", + "### iteration step : 150 rmse : 0.07455141311978046\n", + "### iteration step : 200 rmse : 0.04361016579439073\n", + "### iteration step : 200 rmse : 0.04370913068953006\n", + "### iteration step : 200 rmse : 0.04369072102767977\n", + "### iteration step : 200 rmse : 0.043475549832271414\n", + "### iteration step : 200 rmse : 0.0435313092537358\n", + "### iteration step : 200 rmse : 0.04359240037575283\n", + "### iteration step : 200 rmse : 0.04329647906053838\n", + "### iteration step : 200 rmse : 0.04332057192123618\n", + "### iteration step : 200 rmse : 0.04310448294502512\n", + "### iteration step : 200 rmse : 0.04313550286658552\n", + "### iteration step : 200 rmse : 0.04313786864806258\n", + "### iteration step : 200 rmse : 0.04325226798579314\n", + "### iteration step : 250 rmse : 0.029395183185609734\n", + "### iteration step : 250 rmse : 0.02954402948437167\n", + "### iteration step : 250 rmse : 0.02950187436758184\n", + "### iteration step : 250 rmse : 0.029329609713572593\n", + "### iteration step : 250 rmse : 0.02940211807327667\n", + "### iteration step : 250 rmse : 0.02946720568417511\n", + "### iteration step : 250 rmse : 0.029189294191791375\n", + "### iteration step : 250 rmse : 0.029198757426747605\n", + "### iteration step : 250 rmse : 0.028995742260002243\n", + "### iteration step : 250 rmse : 0.02904415445054541\n", + "### iteration step : 250 rmse : 0.029049587101179365\n", + "### iteration step : 250 rmse : 0.029248328780878973\n", + "### iteration step : 300 rmse : 0.022678715233749362\n", + "### iteration step : 300 rmse : 0.022844873864300484\n", + "### iteration step : 300 rmse : 0.022773566650325074\n", + "### iteration step : 300 rmse : 0.02263234507322516\n", + "### iteration step : 300 rmse : 0.02272006255153119\n", + "### iteration step : 300 rmse : 0.022778917442558434\n", + "### iteration step : 300 rmse : 0.022516243062381223\n", + "### iteration step : 300 rmse : 0.022515508246519694\n", + "### iteration step : 300 rmse : 0.02229491665298542\n", + "### iteration step : 300 rmse : 0.022367287171783136\n", + "### iteration step : 300 rmse : 0.022392303480653113\n", + "### iteration step : 300 rmse : 0.022621116143829466\n", + "### iteration step : 350 rmse : 0.019516973680183715\n", + "### iteration step : 350 rmse : 0.019681605297160464\n", + "### iteration step : 350 rmse : 0.019585635379668415\n", + "### iteration step : 350 rmse : 0.01946716545524988\n", + "### iteration step : 350 rmse : 0.01956568678979253\n", + "### iteration step : 350 rmse : 0.019614020075870497\n", + "### iteration step : 350 rmse : 0.019368393329296258\n", + "### iteration step : 350 rmse : 0.019361014872334943\n", + "### iteration step : 350 rmse : 0.019116038405167533\n", + "### iteration step : 350 rmse : 0.01920981547997513\n", + "### iteration step : 350 rmse : 0.019255623979392192\n", + "### iteration step : 350 rmse : 0.019493636196525135\n", + "### iteration step : 400 rmse : 0.01803666559195465\n", + "### iteration step : 400 rmse : 0.01819133106334419\n", + "### iteration step : 400 rmse : 0.018078504374883574\n", + "### iteration step : 400 rmse : 0.01797554592952707\n", + "### iteration step : 400 rmse : 0.018080509676855847\n", + "### iteration step : 400 rmse : 0.018118882879536648\n", + "### iteration step : 400 rmse : 0.017889686482489363\n", + "### iteration step : 400 rmse : 0.017878066671070433\n", + "### iteration step : 400 rmse : 0.01761224433968553\n", + "### iteration step : 400 rmse : 0.01772096734904666\n", + "### iteration step : 400 rmse : 0.01778179645659777\n", + "### iteration step : 400 rmse : 0.018022719092132704\n", + "### iteration step : 450 rmse : 0.017334045429542092\n", + "### iteration step : 450 rmse : 0.01747683493759156\n", + "### iteration step : 450 rmse : 0.01735361907510825\n", + "### iteration step : 450 rmse : 0.017260553985290646\n", + "### iteration step : 450 rmse : 0.01736909385010645\n", + "### iteration step : 450 rmse : 0.017399933857257726\n", + "### iteration step : 450 rmse : 0.01718431757863743\n", + "### iteration step : 450 rmse : 0.01716990649625117\n", + "### iteration step : 450 rmse : 0.01688861579579296\n", + "### iteration step : 450 rmse : 0.017006638154083088\n", + "### iteration step : 450 rmse : 0.01707679250866153\n", + "### iteration step : 450 rmse : 0.01731968595344266\n", + "### iteration step : 500 rmse : 0.016991609248052833\n", + "### iteration step : 500 rmse : 0.01712340891578616\n", + "### iteration step : 500 rmse : 0.01699398405641037\n", + "### iteration step : 500 rmse : 0.01690707049203008\n", + "### iteration step : 500 rmse : 0.01701760577221745\n", + "### iteration step : 500 rmse : 0.017043277556700362\n", + "### iteration step : 500 rmse : 0.01683803145900356\n", + "### iteration step : 500 rmse : 0.016821674312725313\n", + "### iteration step : 500 rmse : 0.016529281264429145\n", + "### iteration step : 500 rmse : 0.0166528887951985\n", + "### iteration step : 500 rmse : 0.016728541275490984\n", + "### iteration step : 500 rmse : 0.016973657887570753\n", + "### iteration step : 550 rmse : 0.016818969716266233\n", + "### iteration step : 550 rmse : 0.016941445597444732\n", + "### iteration step : 550 rmse : 0.0168082592988841\n", + "### iteration step : 550 rmse : 0.016725234339747562\n", + "### iteration step : 550 rmse : 0.01683693849143515\n", + "### iteration step : 550 rmse : 0.016859187050621206\n", + "### iteration step : 550 rmse : 0.016661644526141564\n", + "### iteration step : 550 rmse : 0.01664385102006508\n", + "### iteration step : 550 rmse : 0.016343446075494233\n", + "### iteration step : 550 rmse : 0.01647044082182643\n", + "### iteration step : 550 rmse : 0.01654932331426952\n", + "### iteration step : 550 rmse : 0.016796804595895633\n", + "### iteration step : 600 rmse : 0.016727439717439115\n", + "### iteration step : 600 rmse : 0.016842259158977232\n", + "### iteration step : 600 rmse : 0.016706687924467476\n", + "### iteration step : 600 rmse : 0.016626255644609397\n", + "### iteration step : 600 rmse : 0.016738696939262717\n", + "### iteration step : 600 rmse : 0.016758682415985614\n", + "### iteration step : 600 rmse : 0.0165668572000528\n", + "### iteration step : 600 rmse : 0.016547954461110684\n", + "### iteration step : 600 rmse : 0.016241668760761063\n", + "### iteration step : 600 rmse : 0.016370800056137867\n", + "### iteration step : 600 rmse : 0.016451627209257007\n", + "### iteration step : 600 rmse : 0.01670132290188466\n", + "### iteration step : 650 rmse : 0.016674291334806343\n", + "### iteration step : 650 rmse : 0.016782895588885082\n", + "### iteration step : 650 rmse : 0.016645698091647773\n", + "### iteration step : 650 rmse : 0.01656714079916223\n", + "### iteration step : 650 rmse : 0.016680091021598568\n", + "### iteration step : 650 rmse : 0.016698554271430792\n", + "### iteration step : 650 rmse : 0.016511017732427972\n", + "### iteration step : 650 rmse : 0.016491228766905293\n", + "### iteration step : 650 rmse : 0.01618054419796173\n", + "### iteration step : 650 rmse : 0.01631111150707529\n", + "### iteration step : 650 rmse : 0.01639316772050061\n", + "### iteration step : 650 rmse : 0.01664473691247669\n", + "### iteration step : 700 rmse : 0.0166383624426085\n", + "### iteration step : 700 rmse : 0.016741936743323586\n", + "### iteration step : 700 rmse : 0.016603524189001625\n", + "### iteration step : 700 rmse : 0.016526454393300468\n", + "### iteration step : 700 rmse : 0.016639792083379498\n", + "### iteration step : 700 rmse : 0.016657201345297346\n", + "### iteration step : 700 rmse : 0.016472928381641428\n", + "### iteration step : 700 rmse : 0.01645241257047358\n", + "### iteration step : 700 rmse : 0.016138379086448083\n", + "### iteration step : 700 rmse : 0.016269993747904915\n", + "### iteration step : 700 rmse : 0.01635288508504558\n", + "### iteration step : 700 rmse : 0.016605910068210026\n", + "### iteration step : 750 rmse : 0.01660906046895522\n", + "### iteration step : 750 rmse : 0.016708562969098305\n", + "### iteration step : 750 rmse : 0.016569153528341783\n", + "### iteration step : 750 rmse : 0.016493367054249922\n", + "### iteration step : 750 rmse : 0.016607027966870924\n", + "### iteration step : 750 rmse : 0.01662368102752549\n", + "### iteration step : 750 rmse : 0.016441927271724666\n", + "### iteration step : 750 rmse : 0.0164208024653437\n", + "### iteration step : 750 rmse : 0.016104179990850755\n", + "### iteration step : 750 rmse : 0.016236628551952913\n", + "### iteration step : 750 rmse : 0.016320141009292095\n", + "### iteration step : 750 rmse : 0.016574200475705\n", + "### iteration step : 800 rmse : 0.016581161561119846\n", + "### iteration step : 800 rmse : 0.016677363428436936\n", + "### iteration step : 800 rmse : 0.016537069269613652\n", + "### iteration step : 800 rmse : 0.0164624613777787\n", + "### iteration step : 800 rmse : 0.016576412350487568\n", + "### iteration step : 800 rmse : 0.01659250180024954\n", + "### iteration step : 800 rmse : 0.01641271740942833\n", + "### iteration step : 800 rmse : 0.016391072859801518\n", + "### iteration step : 800 rmse : 0.01607242307736876\n", + "### iteration step : 800 rmse : 0.016205589842521878\n", + "### iteration step : 800 rmse : 0.016289609430091494\n", + "### iteration step : 800 rmse : 0.01654431582921597\n", + "### iteration step : 850 rmse : 0.01655222898431553\n", + "### iteration step : 850 rmse : 0.01664575121547569\n", + "### iteration step : 850 rmse : 0.016504627328190514\n", + "### iteration step : 850 rmse : 0.016431145801748863\n", + "### iteration step : 850 rmse : 0.016545370571042432\n", + "### iteration step : 850 rmse : 0.016561024020105147\n", + "### iteration step : 850 rmse : 0.016382795627019747\n", + "### iteration step : 850 rmse : 0.016360700076085824\n", + "### iteration step : 850 rmse : 0.016040446344395578\n", + "### iteration step : 850 rmse : 0.016174269580681345\n", + "### iteration step : 850 rmse : 0.016258737354641353\n", + "### iteration step : 850 rmse : 0.01651375177473524\n", + "### iteration step : 900 rmse : 0.016521280433777957\n", + "### iteration step : 900 rmse : 0.016612624200841405\n", + "### iteration step : 900 rmse : 0.016470695682261876\n", + "### iteration step : 900 rmse : 0.016398314989165292\n", + "### iteration step : 900 rmse : 0.016512806333073466\n", + "### iteration step : 900 rmse : 0.01652811087350182\n", + "### iteration step : 900 rmse : 0.016351122754892394\n", + "### iteration step : 900 rmse : 0.016328629783842166\n", + "### iteration step : 900 rmse : 0.016007096878603234\n", + "### iteration step : 900 rmse : 0.016141544071514122\n", + "### iteration step : 900 rmse : 0.016226430843994055\n", + "### iteration step : 900 rmse : 0.01648146573819501\n", + "### iteration step : 950 rmse : 0.016488081335748316\n", + "### iteration step : 950 rmse : 0.016577652134974717\n", + "### iteration step : 950 rmse : 0.01643492933498176\n", + "### iteration step : 950 rmse : 0.0163636366204062\n", + "### iteration step : 950 rmse : 0.01647839195954869\n", + "### iteration step : 950 rmse : 0.01649340903060659\n", + "### iteration step : 950 rmse : 0.016317416842511007\n", + "### iteration step : 950 rmse : 0.016294568571753248\n", + "### iteration step : 950 rmse : 0.015972009545965248\n", + "### iteration step : 950 rmse : 0.0161070634587959\n", + "### iteration step : 950 rmse : 0.016192355609214733\n", + "### iteration step : 950 rmse : 0.016447171683479155\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "pred_matrix = np.dot(P, Q.T)\n", + "print('예측 행렬:\\n', np.round(pred_matrix, 3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "97UH2sMoHCsq", + "outputId": "69c1a8ed-3f7e-47cd-d213-6243795e4eab" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "예측 행렬:\n", + " [[3.991 0.897 1.306 2.002 1.663]\n", + " [6.696 4.978 0.979 2.981 1.003]\n", + " [6.677 0.391 2.987 3.977 3.986]\n", + " [4.968 2.005 1.006 2.017 1.14 ]]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install scikit-surprise" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MP7xQEe_Hax8", + "outputId": "8bbb1249-b6b6-4acf-9d39-5c48060c4679" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: scikit-surprise in /usr/local/lib/python3.12/dist-packages (1.1.4)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-surprise) (1.5.3)\n", + "Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.12/dist-packages (from scikit-surprise) (1.26.4)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-surprise) (1.16.3)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from surprise import SVD\n", + "from surprise import Dataset\n", + "from surprise import accuracy\n", + "from surprise.model_selection import train_test_split" + ], + "metadata": { + "id": "Z-f5ilaCIfuW" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "data = Dataset.load_builtin('ml-100k')\n", + "trainset, testset = train_test_split(data, test_size=.25, random_state=0)" + ], + "metadata": { + "id": "Yljtqwx_IyWv" + }, + "execution_count": 39, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "algo = SVD(random_state=0)\n", + "algo.fit(trainset)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Y2DmVzd-KDjH", + "outputId": "19b1f1f1-0a13-4f38-df8f-dde4cab69f81" + }, + "execution_count": 30, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "predictions = algo.test(testset)\n", + "print('prediction type: ', type(predictions), 'size:', len(predictions))\n", + "print('prediction 결과의 최초 5개 추출')\n", + "predictions[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WAyTx594KJy8", + "outputId": "ad247827-4dda-4860-9237-7507b9aa0511" + }, + "execution_count": 31, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "prediction type: size: 25000\n", + "prediction 결과의 최초 5개 추출\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[Prediction(uid='120', iid='282', r_ui=4.0, est=3.5114147666251547, details={'was_impossible': False}),\n", + " Prediction(uid='882', iid='291', r_ui=4.0, est=3.573872419581491, details={'was_impossible': False}),\n", + " Prediction(uid='535', iid='507', r_ui=5.0, est=4.033583485472447, details={'was_impossible': False}),\n", + " Prediction(uid='697', iid='244', r_ui=5.0, est=3.8463639495936905, details={'was_impossible': False}),\n", + " Prediction(uid='751', iid='385', r_ui=4.0, est=3.1807542478219157, details={'was_impossible': False})]" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [ + "[(pred.uid, pred.iid, pred.est) for pred in predictions[:3]]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kvno5v3vKUNZ", + "outputId": "967667b1-48d9-43e3-8c02-439fde758e8c" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[('120', '282', 3.5114147666251547),\n", + " ('882', '291', 3.573872419581491),\n", + " ('535', '507', 4.033583485472447)]" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ] + }, + { + "cell_type": "code", + "source": [ + "uid =str(196)\n", + "iid=str(302)\n", + "pred=algo.predict(uid, iid)\n", + "print(pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A9lNO37cKbP-", + "outputId": "7039d2ab-9b23-4e6f-ebff-f7ab95e281a3" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "user: 196 item: 302 r_ui = None est = 4.49 {'was_impossible': False}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "accuracy.rmse(predictions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G4RQqY6VKj5e", + "outputId": "bc877eac-181f-4a7b-963e-11ed8ea048f4" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 0.9467\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9466860806937948" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')\n", + "ratings = pd.read_csv(path, sep='\\t', names=['user', 'item', 'rating', 'timestamp'])\n", + "\n", + "ratings.to_csv('ratings_noh.csv', index=False, header=False)" + ], + "metadata": { + "id": "uJSLYCNrL4Aa" + }, + "execution_count": 40, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.listdir('.')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "z8-FlEz7ORPB", + "outputId": "2e48e696-c759-4179-fea4-8ced8c312f15" + }, + "execution_count": 41, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['.config', 'ratings_noh.csv', 'sample_data']" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip\n", + "!unzip ml-latest-small.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b-Ov96JlOUWc", + "outputId": "7ec3b6ed-9e63-41bf-a45a-da1e1609fd03" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-12-22 13:21:59-- https://files.grouplens.org/datasets/movielens/ml-latest-small.zip\n", + "Resolving files.grouplens.org (files.grouplens.org)... 128.101.96.204\n", + "Connecting to files.grouplens.org (files.grouplens.org)|128.101.96.204|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 978202 (955K) [application/zip]\n", + "Saving to: ‘ml-latest-small.zip’\n", + "\n", + "ml-latest-small.zip 100%[===================>] 955.28K 2.63MB/s in 0.4s \n", + "\n", + "2025-12-22 13:21:59 (2.63 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]\n", + "\n", + "Archive: ml-latest-small.zip\n", + " creating: ml-latest-small/\n", + " inflating: ml-latest-small/links.csv \n", + " inflating: ml-latest-small/tags.csv \n", + " inflating: ml-latest-small/ratings.csv \n", + " inflating: ml-latest-small/README.txt \n", + " inflating: ml-latest-small/movies.csv \n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "ratings = pd.read_csv('./ml-latest-small/ratings.csv')\n", + "ratings.to_csv('./ml-latest-small/ratings_noh.csv', index=False, header=False)" + ], + "metadata": { + "id": "0e3SDnZWLHaH" + }, + "execution_count": 43, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from surprise import Reader\n", + "\n", + "reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(0.5, 5))\n", + "data=Dataset.load_from_file('./ml-latest-small/ratings_noh.csv', reader=reader)" + ], + "metadata": { + "id": "_sylnTpCOevv" + }, + "execution_count": 44, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "trainset, testset = train_test_split(data, test_size=.25, random_state=0)\n", + "\n", + "algo = SVD(n_factors=50, random_state=0)\n", + "algo.fit(trainset)\n", + "predictions = algo.test(testset)\n", + "accuracy.rmse(predictions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_TF2um80PSwg", + "outputId": "0b5b4106-6c6a-46de-a48e-f04a609bc7e9" + }, + "execution_count": 45, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 0.8682\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8681952927143516" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "from surprise import Reader, Dataset\n", + "\n", + "ratings = pd.read_csv('./ml-latest-small/ratings.csv')\n", + "reader = Reader(rating_scale=(0.5, 5.0))\n", + "\n", + "data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n", + "trainset, testset = train_test_split(data, test_size=.25, random_state=0)\n", + "\n", + "algo = SVD(n_factors=50, random_state=0)\n", + "algo.fit(trainset)\n", + "predictions = algo.test(testset)\n", + "accuracy.rmse(predictions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g2N3p1gCPh1b", + "outputId": "32f27d20-6dfb-41b9-ca0d-7525149ffcea" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 0.8682\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8681952927143516" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from surprise.model_selection import cross_validate\n", + "\n", + "ratings = pd.read_csv('./ml-latest-small/ratings.csv')\n", + "reader = Reader(rating_scale=(0.5, 5.0))\n", + "data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n", + "\n", + "algo = SVD(random_state=0)\n", + "cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Cye5yKiNRJq4", + "outputId": "6521426f-50b7-4b1a-bf12-5e6af3382ed2" + }, + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluating RMSE, MAE of algorithm SVD on 5 split(s).\n", + "\n", + " Fold 1 Fold 2 Fold 3 Fold 4 Fold 5 Mean Std \n", + "RMSE (testset) 0.8739 0.8639 0.8700 0.8787 0.8775 0.8728 0.0054 \n", + "MAE (testset) 0.6704 0.6644 0.6710 0.6747 0.6739 0.6709 0.0036 \n", + "Fit time 1.84 1.52 1.70 2.56 2.84 2.09 0.52 \n", + "Test time 0.41 0.12 0.32 0.11 0.39 0.27 0.13 \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'test_rmse': array([0.87386097, 0.86385394, 0.87001019, 0.87869206, 0.87754842]),\n", + " 'test_mae': array([0.67040961, 0.66442237, 0.67098542, 0.67468529, 0.67385167]),\n", + " 'fit_time': (1.8381202220916748,\n", + " 1.5165598392486572,\n", + " 1.7002708911895752,\n", + " 2.558201789855957,\n", + " 2.842299222946167),\n", + " 'test_time': (0.4111745357513428,\n", + " 0.12148857116699219,\n", + " 0.3215363025665283,\n", + " 0.11472773551940918,\n", + " 0.3941831588745117)}" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from surprise.model_selection import GridSearchCV\n", + "\n", + "param_grid = {'n_epochs': [20, 40, 60], 'n_factors': [50, 100, 200]}\n", + "\n", + "gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)\n", + "gs.fit(data)\n", + "\n", + "print(gs.best_score['rmse'])\n", + "print(gs.best_params['rmse'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wOpdVECwRV2U", + "outputId": "01d8e1d6-abae-4b42-a0d4-044bcc139aea" + }, + "execution_count": 48, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0.8779338881055662\n", + "{'n_epochs': 20, 'n_factors': 50}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n", + "algo = SVD(n_factors=50, random_state=0)\n", + "algo.fit(data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 263 + }, + "id": "JJoVje7oRfpn", + "outputId": "04587f62-d38b-4eb9-a963-28ec7430b1b2" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'DatasetAutoFolds' object has no attribute 'n_users'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-510087989.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'userId'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'movieId'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rating'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0malgo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSVD\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_factors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0malgo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/surprise/prediction_algorithms/matrix_factorization.pyx\u001b[0m in \u001b[0;36msurprise.prediction_algorithms.matrix_factorization.SVD.fit\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/surprise/prediction_algorithms/matrix_factorization.pyx\u001b[0m in \u001b[0;36msurprise.prediction_algorithms.matrix_factorization.SVD.sgd\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DatasetAutoFolds' object has no attribute 'n_users'" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from surprise.dataset import DatasetAutoFolds\n", + "\n", + "reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(0.5, 5))\n", + "data_folds = DatasetAutoFolds(ratings_file='./ml-latest-small/ratings_noh.csv', reader=reader)\n", + "trainset = data_folds.build_full_trainset()" + ], + "metadata": { + "id": "Nt4adJw5RoTj" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "algo = SVD(n_epochs=20, n_factors=50, random_state=0)\n", + "algo.fit(trainset)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tOuvFSPiRy7B", + "outputId": "ac459b15-eb4a-41cb-c95e-b51facbcaa35" + }, + "execution_count": 51, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ] + }, + { + "cell_type": "code", + "source": [ + "movies = pd.read_csv('./ml-latest-small/movies.csv')\n", + "movieIds = ratings[ratings['userId'] == 9]['movieId']\n", + "if movieIds[movieIds == 42].count() == 0:\n", + " print('사용자 아이디 9는 영화 아이디 42의 평점 없음')\n", + "\n", + "print(movies[movies['movieId'] == 42])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jrzQBhXfR2Fa", + "outputId": "52994ef5-b40f-4da0-c5e8-aed31e4ce6ca" + }, + "execution_count": 52, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "사용자 아이디 9는 영화 아이디 42의 평점 없음\n", + " movieId title genres\n", + "38 42 Dead Presidents (1995) Action|Crime|Drama\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "uid = str(9)\n", + "iid = str(42)\n", + "\n", + "pred = algo.predict(uid, iid, verbose=True)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ja3VgkcbR86C", + "outputId": "f32dfeee-4ef8-4928-c3e3-53faf63cb781" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "user: 9 item: 42 r_ui = None est = 3.13 {'was_impossible': False}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "def get_unseen_surprise(ratings, movies, userId):\n", + " seen_movies = ratings[ratings['userId'] == userId]['movieId'].tolist()\n", + "\n", + " total_movies = movies['movieId'].tolist()\n", + "\n", + " unseen_movies = [movie for movie in total_movies if movie not in seen_movies]\n", + " print('평점 매긴 영화 수:', len(seen_movies), '추천 대상 영화 수:', len(unseen_movies), '전체 영화 수:', len(total_movies))\n", + "\n", + " return unseen_movies\n", + "\n", + "unseen_movies = get_unseen_surprise(ratings, movies, 9)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qh7MndvdSd4Z", + "outputId": "13c1b426-389c-4c66-a4a1-07ddc5597b0c" + }, + "execution_count": 55, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "평점 매긴 영화 수: 46 추천 대상 영화 수: 9696 전체 영화 수: 9742\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "def recomm_movie_by_surprise(algo, userId, unseen_movies, top_n=10):\n", + " predictions = [algo.predict(str(userId), str(movieId)) for movieId in unseen_movies]\n", + "\n", + " def sortkey_est(pred):\n", + " return pred.est\n", + "\n", + " predictions.sort(key=sortkey_est, reverse=True)\n", + " top_predictions=predictions[:top_n]\n", + "\n", + " top_movie_ids = [int(pred.iid) for pred in top_predictions]\n", + " top_movie_rating = [pred.est for pred in top_predictions]\n", + " top_movie_titles = movies[movies.movieId.isin(top_movie_ids)]['title']\n", + " top_movie_preds = [(id, title, rating) for id, title, rating in zip(top_movie_ids, top_movie_titles, top_movie_rating)]\n", + "\n", + " return top_movie_preds\n", + "\n", + "unseen_movies = get_unseen_surprise(ratings, movies, 9)\n", + "top_movie_preds = recomm_movie_by_surprise(algo, 9, unseen_movies, top_n=10)\n", + "\n", + "print('#### Top-10 추천 영화 리스트 ####')\n", + "for top_movie in top_movie_preds:\n", + " print(top_movie[1], \":\", top_movie[2])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bFV5HPqMS-kc", + "outputId": "8ca69f68-0b12-4031-b723-4d47d089f8ee" + }, + "execution_count": 56, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "평점 매긴 영화 수: 46 추천 대상 영화 수: 9696 전체 영화 수: 9742\n", + "#### Top-10 추천 영화 리스트 ####\n", + "Usual Suspects, The (1995) : 4.306302135700814\n", + "Star Wars: Episode IV - A New Hope (1977) : 4.281663842987387\n", + "Pulp Fiction (1994) : 4.278152632122759\n", + "Silence of the Lambs, The (1991) : 4.226073566460876\n", + "Godfather, The (1972) : 4.1918097904381995\n", + "Streetcar Named Desire, A (1951) : 4.154746591122657\n", + "Star Wars: Episode V - The Empire Strikes Back (1980) : 4.122016128534504\n", + "Star Wars: Episode VI - Return of the Jedi (1983) : 4.108009609093436\n", + "Goodfellas (1990) : 4.083464936588478\n", + "Glory (1989) : 4.07887165526957\n" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 1cdf163..0000000 --- a/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# 9th-ML🐢 ```ML 세션``` - -## 팀 구성 - -|팀|팀원| | | -|---|---|---|---| -|**팀 1**|박혜린|엄지민|조승연| -|**팀 2**|강민서|이가은|박나림| -|**팀 3**|권혜수|조한희|이시현| -|**팀 4**|진규빈|김두현|노현선| - -## Curriculum -|주차|날짜|내용|발표자|발표 자료| -|:-:|:---:|---------------|:---:|:-:| -|-|09/02|**OT**||| -|1주차|09/09|파머완 1장|1팀|| -|2주차|09/16|파머완 2장, 3장|2팀|| -|3주차|09/23|파머완 4장 - Part 1(4.1장 ~ 4.4장)|3팀|| -|4주차|09/30|파머완 4장 - Part 2(4.5장 ~ 4.8장, 4.10장 ~ 4.11장)|4팀|| -|5주차|10/07|분류 실습(파머완 4.9, 4.10장 + Kaggle 필사)|1팀|| -|6주차|10/14|중간고사 휴식 - 1||| -|7주차|10/21|중간고사 휴식 - 2||| -|8주차|10/28|파머완 5장(5.9, 5.10장 제외)|2팀|| -|9주차|11/04|회귀 실습(파머완 5.9, 5.10장 + Kaggle 필사)|3팀|| -|10주차|11/11|파머완 6장|4팀|| -|11주차|11/18|파머완 7장|1팀|| -|12주차|11/25|파머완 8장 - Part 1(8.1 ~ 8.3, 8.5장)|2팀|| -|13주차|12/02|파머완 8장 - Part 2(8.6 ~ 8.9장)|3팀|| -|14주차|12/09|기말고사 휴식기간 - 1||| -|15주차|12/16|기말고사 휴식기간 - 2||| -|16주차|12/23|파머완 9장 + **아이데이션**|4팀|| -|17주차|12/30|프로젝트 주간 1||| -|18주차|01/06|프로젝트 주간 2||| -|19주차|01/13|프로젝트 주간 3||| -|20주차|01/20|프로젝트 주간 4||| -|21주차|01/27|프로젝트 주간 5||| -|22주차|02/03|프로젝트 주간 6||| -|23주차|02/10|수료식||| diff --git a/week16-9-5.ipynb b/week16-9-5.ipynb new file mode 100644 index 0000000..5332db7 --- /dev/null +++ b/week16-9-5.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":14328567,"sourceType":"datasetVersion","datasetId":9147598},{"sourceId":14328657,"sourceType":"datasetVersion","datasetId":9147665},{"sourceId":90985619,"sourceType":"kernelVersion"}],"dockerImageVersionId":31239,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"##### This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2025-12-29T07:23:24.224285Z","iopub.execute_input":"2025-12-29T07:23:24.224661Z","iopub.status.idle":"2025-12-29T07:23:24.242383Z","shell.execute_reply.started":"2025-12-29T07:23:24.224634Z","shell.execute_reply":"2025-12-29T07:23:24.241544Z"},"trusted":true},"outputs":[{"name":"stdout","text":"/kaggle/input/tmbd-5000-credit/tmdb_5000_credits.csv\n/kaggle/input/tmbd-movies/tmdb_5000_movies.csv\n/kaggle/input/tmdb-5000-movie-dataset/__results__.html\n/kaggle/input/tmdb-5000-movie-dataset/__resultx__.html\n/kaggle/input/tmdb-5000-movie-dataset/__notebook__.ipynb\n/kaggle/input/tmdb-5000-movie-dataset/__output__.json\n/kaggle/input/tmdb-5000-movie-dataset/custom.css\n/kaggle/input/tmdb-5000-movie-dataset/__results___files/__results___47_0.png\n/kaggle/input/tmdb-5000-movie-dataset/__results___files/__results___45_0.png\n/kaggle/input/tmdb-5000-movie-dataset/__results___files/__results___51_0.png\n/kaggle/input/tmdb-5000-movie-dataset/__results___files/__results___12_0.png\n","output_type":"stream"}],"execution_count":16},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport warnings; warnings.filterwarnings('ignore')\n\nmovies = pd.read_csv('/kaggle/input/tmbd-movies/tmdb_5000_movies.csv')\nprint(movies.shape)\nmovies.head(1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:23:40.220053Z","iopub.execute_input":"2025-12-29T07:23:40.220368Z","iopub.status.idle":"2025-12-29T07:23:40.383603Z","shell.execute_reply.started":"2025-12-29T07:23:40.220344Z","shell.execute_reply":"2025-12-29T07:23:40.382532Z"}},"outputs":[{"name":"stdout","text":"(4803, 20)\n","output_type":"stream"},{"execution_count":19,"output_type":"execute_result","data":{"text/plain":" budget \\\n0 237000000 \n\n genres \\\n0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {... \n\n homepage id \\\n0 http://www.avatarmovie.com/ 19995 \n\n keywords \\\n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"sp... \n\n original_language original_title \\\n0 en Avatar \n\n overview \\\n0 In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, ... \n\n popularity \\\n0 150.437577 \n\n production_companies \\\n0 [{\"name\": \"Ingenious Film Partners\", \"id\": 289}, {\"name\": \"Twentieth Century Fox Film Corporatio... \n\n production_countries \\\n0 [{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}, {\"iso_3166_1\": \"GB\", \"name\": \"United ... \n\n release_date revenue runtime \\\n0 2009-12-10 2787965087 162.0 \n\n spoken_languages \\\n0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}] \n\n status tagline title vote_average vote_count \n0 Released Enter the World of Pandora. Avatar 7.2 11800 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companiesproduction_countriesrelease_daterevenueruntimespoken_languagesstatustaglinetitlevote_averagevote_count
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"sp...enAvatarIn the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, ...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289}, {\"name\": \"Twentieth Century Fox Film Corporatio...[{\"iso_3166_1\": \"US\", \"name\": \"United States of America\"}, {\"iso_3166_1\": \"GB\", \"name\": \"United ...2009-12-102787965087162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]ReleasedEnter the World of Pandora.Avatar7.211800
\n
"},"metadata":{}}],"execution_count":19},{"cell_type":"code","source":"movies_df = movies[['id', 'title', 'genres', 'vote_average', 'vote_count', 'popularity', 'keywords', 'overview']]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:24:02.644368Z","iopub.execute_input":"2025-12-29T07:24:02.645043Z","iopub.status.idle":"2025-12-29T07:24:02.651149Z","shell.execute_reply.started":"2025-12-29T07:24:02.645010Z","shell.execute_reply":"2025-12-29T07:24:02.650240Z"}},"outputs":[],"execution_count":22},{"cell_type":"code","source":"pd.set_option('max_colwidth', 100)\nmovies_df[['genres', 'keywords']][:1]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:23:59.455690Z","iopub.execute_input":"2025-12-29T07:23:59.456048Z","iopub.status.idle":"2025-12-29T07:23:59.465076Z","shell.execute_reply.started":"2025-12-29T07:23:59.456022Z","shell.execute_reply":"2025-12-29T07:23:59.464364Z"}},"outputs":[{"execution_count":21,"output_type":"execute_result","data":{"text/plain":" genres \\\n0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {... \n\n keywords \n0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"sp... ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
genreskeywords
0[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {...[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"sp...
\n
"},"metadata":{}}],"execution_count":21},{"cell_type":"code","source":"from ast import literal_eval\nmovies_df['genres'] = movies_df['genres'].apply(literal_eval)\nmovies_df['keywords'] = movies_df['keywords'].apply(literal_eval)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:25:23.227726Z","iopub.execute_input":"2025-12-29T07:25:23.228093Z","iopub.status.idle":"2025-12-29T07:25:23.834588Z","shell.execute_reply.started":"2025-12-29T07:25:23.228066Z","shell.execute_reply":"2025-12-29T07:25:23.833752Z"}},"outputs":[],"execution_count":23},{"cell_type":"code","source":"movies_df['genres'] = movies_df['genres'].apply(lambda x:[y['name'] for y in x])\nmovies_df['keywords'] = movies_df['keywords'].apply(lambda x : [y['name'] for y in x])\nmovies_df[['genres', 'keywords']][:1]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:27:02.356995Z","iopub.execute_input":"2025-12-29T07:27:02.357784Z","iopub.status.idle":"2025-12-29T07:27:02.386181Z","shell.execute_reply.started":"2025-12-29T07:27:02.357757Z","shell.execute_reply":"2025-12-29T07:27:02.385290Z"}},"outputs":[{"execution_count":24,"output_type":"execute_result","data":{"text/plain":" genres \\\n0 [Action, Adventure, Fantasy, Science Fiction] \n\n keywords \n0 [culture clash, future, space war, space colony, society, space travel, futuristic, romance, spa... ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
genreskeywords
0[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colony, society, space travel, futuristic, romance, spa...
\n
"},"metadata":{}}],"execution_count":24},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity\n\ngenre_sim = cosine_similarity(genre_mat, genre_mat)\nprint(genre_sim.shape)\nprint(genre_sim[:2])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:31:40.621309Z","iopub.execute_input":"2025-12-29T07:31:40.621600Z","iopub.status.idle":"2025-12-29T07:31:41.329369Z","shell.execute_reply.started":"2025-12-29T07:31:40.621572Z","shell.execute_reply":"2025-12-29T07:31:41.328496Z"}},"outputs":[{"name":"stdout","text":"(4803, 4803)\n[[1. 0.59628479 0.4472136 ... 0. 0. 0. ]\n [0.59628479 1. 0.4 ... 0. 0. 0. ]]\n","output_type":"stream"}],"execution_count":29},{"cell_type":"code","source":"from sklearn.feature_extraction.text import CountVectorizer\n\nmovies_df['genres_literal'] = movies_df['genres'].apply(lambda x :(' ').join(x))\ncount_vect = CountVectorizer(min_df=0, ngram_range=(1,2))\ngenre_mat=count_vect.fit_transform(movies_df['genres_literal'])\nprint(genre_mat.shape)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:29:27.704647Z","iopub.execute_input":"2025-12-29T07:29:27.705008Z","iopub.status.idle":"2025-12-29T07:29:27.766696Z","shell.execute_reply.started":"2025-12-29T07:29:27.704980Z","shell.execute_reply":"2025-12-29T07:29:27.765863Z"}},"outputs":[{"name":"stdout","text":"(4803, 276)\n","output_type":"stream"}],"execution_count":27},{"cell_type":"code","source":"genre_sim_sorted_ind = genre_sim.argsort()[:, ::-1]\nprint(genre_sim_sorted_ind[:1])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:31:45.913965Z","iopub.execute_input":"2025-12-29T07:31:45.914287Z","iopub.status.idle":"2025-12-29T07:31:46.629725Z","shell.execute_reply.started":"2025-12-29T07:31:45.914263Z","shell.execute_reply":"2025-12-29T07:31:46.628836Z"}},"outputs":[{"name":"stdout","text":"[[ 0 3494 813 ... 3038 3037 2401]]\n","output_type":"stream"}],"execution_count":30},{"cell_type":"code","source":"def find_sim_movie(df, sorted_ind, title_name, top_n=10):\n title_movie = df[df['title']==title_name]\n title_index = title_movie.index.values\n similar_indexes = sorted_ind[title_index, :(top_n)]\n print(similar_indexes)\n similar_indexes = similar_indexes.reshape(-1)\n return df.iloc[similar_indexes]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:35:18.094569Z","iopub.execute_input":"2025-12-29T07:35:18.094932Z","iopub.status.idle":"2025-12-29T07:35:18.100858Z","shell.execute_reply.started":"2025-12-29T07:35:18.094907Z","shell.execute_reply":"2025-12-29T07:35:18.099679Z"}},"outputs":[],"execution_count":31},{"cell_type":"code","source":"similar_movies = find_sim_movie(movies_df, genre_sim_sorted_ind, 'The Godfather', 10)\nsimilar_movies[['title', 'vote_average']]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:40:48.095539Z","iopub.execute_input":"2025-12-29T07:40:48.095979Z","iopub.status.idle":"2025-12-29T07:40:48.109274Z","shell.execute_reply.started":"2025-12-29T07:40:48.095954Z","shell.execute_reply":"2025-12-29T07:40:48.108185Z"}},"outputs":[{"name":"stdout","text":"[[2731 1243 3636 1946 2640 4065 1847 4217 883 3866]]\n","output_type":"stream"},{"execution_count":32,"output_type":"execute_result","data":{"text/plain":" title vote_average\n2731 The Godfather: Part II 8.3\n1243 Mean Streets 7.2\n3636 Light Sleeper 5.7\n1946 The Bad Lieutenant: Port of Call - New Orleans 6.0\n2640 Things to Do in Denver When You're Dead 6.7\n4065 Mi America 0.0\n1847 GoodFellas 8.2\n4217 Kids 6.8\n883 Catch Me If You Can 7.7\n3866 City of God 8.1","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
titlevote_average
2731The Godfather: Part II8.3
1243Mean Streets7.2
3636Light Sleeper5.7
1946The Bad Lieutenant: Port of Call - New Orleans6.0
2640Things to Do in Denver When You're Dead6.7
4065Mi America0.0
1847GoodFellas8.2
4217Kids6.8
883Catch Me If You Can7.7
3866City of God8.1
\n
"},"metadata":{}}],"execution_count":32},{"cell_type":"code","source":"movies_df[['title', 'vote_average', 'vote_count']].sort_values('vote_average', ascending=False)[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:41:35.288768Z","iopub.execute_input":"2025-12-29T07:41:35.289120Z","iopub.status.idle":"2025-12-29T07:41:35.302801Z","shell.execute_reply.started":"2025-12-29T07:41:35.289096Z","shell.execute_reply":"2025-12-29T07:41:35.301975Z"}},"outputs":[{"execution_count":33,"output_type":"execute_result","data":{"text/plain":" title vote_average vote_count\n3519 Stiff Upper Lips 10.0 1\n4247 Me You and Five Bucks 10.0 2\n4045 Dancer, Texas Pop. 81 10.0 1\n4662 Little Big Top 10.0 1\n3992 Sardaarji 9.5 2\n2386 One Man's Hero 9.3 2\n2970 There Goes My Baby 8.5 2\n1881 The Shawshank Redemption 8.5 8205\n2796 The Prisoner of Zenda 8.4 11\n3337 The Godfather 8.4 5893","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
titlevote_averagevote_count
3519Stiff Upper Lips10.01
4247Me You and Five Bucks10.02
4045Dancer, Texas Pop. 8110.01
4662Little Big Top10.01
3992Sardaarji9.52
2386One Man's Hero9.32
2970There Goes My Baby8.52
1881The Shawshank Redemption8.58205
2796The Prisoner of Zenda8.411
3337The Godfather8.45893
\n
"},"metadata":{}}],"execution_count":33},{"cell_type":"code","source":"C = movies_df['vote_average'].mean()\nm= movies_df['vote_count'].quantile(0.6)\nprint('C:', round(C, 3), 'm:', round(m, 3))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:42:41.309904Z","iopub.execute_input":"2025-12-29T07:42:41.310256Z","iopub.status.idle":"2025-12-29T07:42:41.331296Z","shell.execute_reply.started":"2025-12-29T07:42:41.310231Z","shell.execute_reply":"2025-12-29T07:42:41.330169Z"}},"outputs":[{"name":"stdout","text":"C: 6.092 m: 370.2\n","output_type":"stream"}],"execution_count":34},{"cell_type":"code","source":"percentile = 0.6\nm = movies_df['vote_count'].quantile(percentile)\nC = movies_df['vote_average'].mean()\n\ndef weighted_vote_average(record):\n v = record['vote_count']\n R = record['vote_average']\n return((v/(v+m)*R) + ((m/(m+v)))*C)\n\nmovies_df['weighted_vote'] = movies.apply(weighted_vote_average, axis=1)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:44:46.243682Z","iopub.execute_input":"2025-12-29T07:44:46.244647Z","iopub.status.idle":"2025-12-29T07:44:46.298046Z","shell.execute_reply.started":"2025-12-29T07:44:46.244543Z","shell.execute_reply":"2025-12-29T07:44:46.297074Z"}},"outputs":[],"execution_count":35},{"cell_type":"code","source":"movies_df[['title', 'vote_average', 'weighted_vote', 'vote_count']].sort_values('weighted_vote', ascending=False)[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:45:26.947476Z","iopub.execute_input":"2025-12-29T07:45:26.947783Z","iopub.status.idle":"2025-12-29T07:45:26.963034Z","shell.execute_reply.started":"2025-12-29T07:45:26.947758Z","shell.execute_reply":"2025-12-29T07:45:26.961934Z"}},"outputs":[{"execution_count":37,"output_type":"execute_result","data":{"text/plain":" title vote_average weighted_vote vote_count\n1881 The Shawshank Redemption 8.5 8.396052 8205\n3337 The Godfather 8.4 8.263591 5893\n662 Fight Club 8.3 8.216455 9413\n3232 Pulp Fiction 8.3 8.207102 8428\n65 The Dark Knight 8.2 8.136930 12002\n1818 Schindler's List 8.3 8.126069 4329\n3865 Whiplash 8.3 8.123248 4254\n809 Forrest Gump 8.2 8.105954 7927\n2294 Spirited Away 8.3 8.105867 3840\n2731 The Godfather: Part II 8.3 8.079586 3338","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
titlevote_averageweighted_votevote_count
1881The Shawshank Redemption8.58.3960528205
3337The Godfather8.48.2635915893
662Fight Club8.38.2164559413
3232Pulp Fiction8.38.2071028428
65The Dark Knight8.28.13693012002
1818Schindler's List8.38.1260694329
3865Whiplash8.38.1232484254
809Forrest Gump8.28.1059547927
2294Spirited Away8.38.1058673840
2731The Godfather: Part II8.38.0795863338
\n
"},"metadata":{}}],"execution_count":37},{"cell_type":"code","source":"def find_sim_movie(df, sorted_ind, title_name, top_n=10):\n title_movie = df[df['title']==title_name]\n title_index = title_movie.index.values\n similar_indexes = sorted_ind[title_index, :(top_n*2)]\n similar_indexes = similar_indexes.reshape(-1)\n similar_indexes = similar_indexes[similar_indexes != title_index]\n return df.iloc[similar_indexes].sort_values('weighted_vote', ascending=False)[:top_n]\n\nsimilar_movies = find_sim_movie(movies_df, genre_sim_sorted_ind, 'The Godfather', 10)\nsimilar_movies[['title', 'vote_average', 'weighted_vote']]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T07:50:14.758426Z","iopub.execute_input":"2025-12-29T07:50:14.759150Z","iopub.status.idle":"2025-12-29T07:50:14.774674Z","shell.execute_reply.started":"2025-12-29T07:50:14.759119Z","shell.execute_reply":"2025-12-29T07:50:14.773661Z"}},"outputs":[{"execution_count":38,"output_type":"execute_result","data":{"text/plain":" title vote_average weighted_vote\n2731 The Godfather: Part II 8.3 8.079586\n1847 GoodFellas 8.2 7.976937\n3866 City of God 8.1 7.759693\n1663 Once Upon a Time in America 8.2 7.657811\n883 Catch Me If You Can 7.7 7.557097\n281 American Gangster 7.4 7.141396\n4041 This Is England 7.4 6.739664\n1149 American Hustle 6.8 6.717525\n1243 Mean Streets 7.2 6.626569\n2839 Rounders 6.9 6.530427","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
titlevote_averageweighted_vote
2731The Godfather: Part II8.38.079586
1847GoodFellas8.27.976937
3866City of God8.17.759693
1663Once Upon a Time in America8.27.657811
883Catch Me If You Can7.77.557097
281American Gangster7.47.141396
4041This Is England7.46.739664
1149American Hustle6.86.717525
1243Mean Streets7.26.626569
2839Rounders6.96.530427
\n
"},"metadata":{}}],"execution_count":38}]} \ No newline at end of file diff --git a/week16-9-6.ipynb b/week16-9-6.ipynb new file mode 100644 index 0000000..c028fc4 --- /dev/null +++ b/week16-9-6.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":14330189,"sourceType":"datasetVersion","datasetId":9148797}],"dockerImageVersionId":31239,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:02:48.909660Z","iopub.execute_input":"2025-12-29T10:02:48.909958Z","iopub.status.idle":"2025-12-29T10:02:51.396647Z","shell.execute_reply.started":"2025-12-29T10:02:48.909931Z","shell.execute_reply":"2025-12-29T10:02:51.395552Z"}},"outputs":[{"name":"stdout","text":"/kaggle/input/ml-latest-small/ml-latest-small/movies.csv\n/kaggle/input/ml-latest-small/ml-latest-small/ratings.csv\n/kaggle/input/ml-latest-small/ml-latest-small/README.txt\n/kaggle/input/ml-latest-small/ml-latest-small/tags.csv\n/kaggle/input/ml-latest-small/ml-latest-small/links.csv\n","output_type":"stream"}],"execution_count":1},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\n\nmovies = pd.read_csv('/kaggle/input/ml-latest-small/ml-latest-small/movies.csv')\nratings = pd.read_csv('/kaggle/input/ml-latest-small/ml-latest-small/ratings.csv')\nprint(movies.shape)\nprint(ratings.shape)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:04:27.521860Z","iopub.execute_input":"2025-12-29T10:04:27.522220Z","iopub.status.idle":"2025-12-29T10:04:27.670739Z","shell.execute_reply.started":"2025-12-29T10:04:27.522193Z","shell.execute_reply":"2025-12-29T10:04:27.669671Z"}},"outputs":[{"name":"stdout","text":"(9742, 3)\n(100836, 4)\n","output_type":"stream"}],"execution_count":2},{"cell_type":"code","source":"ratings = ratings[['userId', 'movieId', 'rating']]\nratings_matrix = ratings.pivot_table('rating', index='userId', columns='movieId')\nratings_matrix.head(3)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:05:51.335458Z","iopub.execute_input":"2025-12-29T10:05:51.335763Z","iopub.status.idle":"2025-12-29T10:05:51.533334Z","shell.execute_reply.started":"2025-12-29T10:05:51.335738Z","shell.execute_reply":"2025-12-29T10:05:51.532525Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.11/dist-packages/pandas/io/formats/format.py:1458: RuntimeWarning: invalid value encountered in greater\n has_large_values = (abs_vals > 1e6).any()\n/usr/local/lib/python3.11/dist-packages/pandas/io/formats/format.py:1459: RuntimeWarning: invalid value encountered in less\n has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()\n/usr/local/lib/python3.11/dist-packages/pandas/io/formats/format.py:1459: RuntimeWarning: invalid value encountered in greater\n has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()\n","output_type":"stream"},{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"movieId 1 2 3 4 5 6 7 8 \\\nuserId \n1 4.0 NaN 4.0 NaN NaN 4.0 NaN NaN \n2 NaN NaN NaN NaN NaN NaN NaN NaN \n3 NaN NaN NaN NaN NaN NaN NaN NaN \n\nmovieId 9 10 ... 193565 193567 193571 193573 193579 193581 \\\nuserId ... \n1 NaN NaN ... NaN NaN NaN NaN NaN NaN \n2 NaN NaN ... NaN NaN NaN NaN NaN NaN \n3 NaN NaN ... NaN NaN NaN NaN NaN NaN \n\nmovieId 193583 193585 193587 193609 \nuserId \n1 NaN NaN NaN NaN \n2 NaN NaN NaN NaN \n3 NaN NaN NaN NaN \n\n[3 rows x 9724 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
movieId12345678910...193565193567193571193573193579193581193583193585193587193609
userId
14.0NaN4.0NaNNaN4.0NaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

3 rows × 9724 columns

\n
"},"metadata":{}}],"execution_count":4},{"cell_type":"code","source":"rating_movies = pd.merge(ratings, movies, on='movieId')\n\nratings_matrix = rating_movies.pivot_table('rating', index='userId', columns='title')\nratings_matrix = ratings_matrix.fillna(0)\nratings_matrix.head(3)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:07:14.151292Z","iopub.execute_input":"2025-12-29T10:07:14.151599Z","iopub.status.idle":"2025-12-29T10:07:14.404792Z","shell.execute_reply.started":"2025-12-29T10:07:14.151576Z","shell.execute_reply":"2025-12-29T10:07:14.404009Z"}},"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"title '71 (2014) 'Hellboy': The Seeds of Creation (2004) \\\nuserId \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n\ntitle 'Round Midnight (1986) 'Salem's Lot (2004) \\\nuserId \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n\ntitle 'Til There Was You (1997) 'Tis the Season for Love (2015) \\\nuserId \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n\ntitle 'burbs, The (1989) 'night Mother (1986) (500) Days of Summer (2009) \\\nuserId \n1 0.0 0.0 0.0 \n2 0.0 0.0 0.0 \n3 0.0 0.0 0.0 \n\ntitle *batteries not included (1987) ... Zulu (2013) [REC] (2007) \\\nuserId ... \n1 0.0 ... 0.0 0.0 \n2 0.0 ... 0.0 0.0 \n3 0.0 ... 0.0 0.0 \n\ntitle [REC]² (2009) [REC]³ 3 Génesis (2012) \\\nuserId \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n\ntitle anohana: The Flower We Saw That Day - The Movie (2013) \\\nuserId \n1 0.0 \n2 0.0 \n3 0.0 \n\ntitle eXistenZ (1999) xXx (2002) xXx: State of the Union (2005) \\\nuserId \n1 0.0 0.0 0.0 \n2 0.0 0.0 0.0 \n3 0.0 0.0 0.0 \n\ntitle ¡Three Amigos! (1986) À nous la liberté (Freedom for Us) (1931) \nuserId \n1 4.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n\n[3 rows x 9719 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
title'71 (2014)'Hellboy': The Seeds of Creation (2004)'Round Midnight (1986)'Salem's Lot (2004)'Til There Was You (1997)'Tis the Season for Love (2015)'burbs, The (1989)'night Mother (1986)(500) Days of Summer (2009)*batteries not included (1987)...Zulu (2013)[REC] (2007)[REC]² (2009)[REC]³ 3 Génesis (2012)anohana: The Flower We Saw That Day - The Movie (2013)eXistenZ (1999)xXx (2002)xXx: State of the Union (2005)¡Three Amigos! (1986)À nous la liberté (Freedom for Us) (1931)
userId
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.04.00.0
20.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n

3 rows × 9719 columns

\n
"},"metadata":{}}],"execution_count":5},{"cell_type":"code","source":"ratings_matrix_T = ratings_matrix.transpose()\nratings_matrix_T.head(3)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:07:58.911928Z","iopub.execute_input":"2025-12-29T10:07:58.912276Z","iopub.status.idle":"2025-12-29T10:07:58.935441Z","shell.execute_reply.started":"2025-12-29T10:07:58.912251Z","shell.execute_reply":"2025-12-29T10:07:58.934523Z"}},"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":"userId 1 2 3 4 5 6 7 \\\ntitle \n'71 (2014) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n'Round Midnight (1986) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n\nuserId 8 9 10 ... 601 602 603 \\\ntitle ... \n'71 (2014) 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n'Round Midnight (1986) 0.0 0.0 0.0 ... 0.0 0.0 0.0 \n\nuserId 604 605 606 607 608 609 610 \ntitle \n'71 (2014) 0.0 0.0 0.0 0.0 0.0 0.0 4.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n'Round Midnight (1986) 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n\n[3 rows x 610 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
userId12345678910...601602603604605606607608609610
title
'71 (2014)0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.04.0
'Hellboy': The Seeds of Creation (2004)0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
'Round Midnight (1986)0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n

3 rows × 610 columns

\n
"},"metadata":{}}],"execution_count":6},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity\n\nitem_sim = cosine_similarity(ratings_matrix_T, ratings_matrix_T)\n\nitem_sim_df = pd.DataFrame(data=item_sim, index=ratings_matrix.columns, columns=ratings_matrix.columns)\nprint(item_sim_df.shape)\nitem_sim_df.head(3)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:10:26.148797Z","iopub.execute_input":"2025-12-29T10:10:26.149158Z","iopub.status.idle":"2025-12-29T10:10:28.618007Z","shell.execute_reply.started":"2025-12-29T10:10:26.149130Z","shell.execute_reply":"2025-12-29T10:10:28.617012Z"}},"outputs":[{"name":"stdout","text":"(9719, 9719)\n","output_type":"stream"},{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"title '71 (2014) \\\ntitle \n'71 (2014) 1.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle 'Hellboy': The Seeds of Creation (2004) \\\ntitle \n'71 (2014) 0.000000 \n'Hellboy': The Seeds of Creation (2004) 1.000000 \n'Round Midnight (1986) 0.707107 \n\ntitle 'Round Midnight (1986) \\\ntitle \n'71 (2014) 0.000000 \n'Hellboy': The Seeds of Creation (2004) 0.707107 \n'Round Midnight (1986) 1.000000 \n\ntitle 'Salem's Lot (2004) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle 'Til There Was You (1997) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle 'Tis the Season for Love (2015) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle 'burbs, The (1989) \\\ntitle \n'71 (2014) 0.000000 \n'Hellboy': The Seeds of Creation (2004) 0.000000 \n'Round Midnight (1986) 0.176777 \n\ntitle 'night Mother (1986) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle (500) Days of Summer (2009) \\\ntitle \n'71 (2014) 0.141653 \n'Hellboy': The Seeds of Creation (2004) 0.000000 \n'Round Midnight (1986) 0.000000 \n\ntitle *batteries not included (1987) ... \\\ntitle ... \n'71 (2014) 0.0 ... \n'Hellboy': The Seeds of Creation (2004) 0.0 ... \n'Round Midnight (1986) 0.0 ... \n\ntitle Zulu (2013) [REC] (2007) \\\ntitle \n'71 (2014) 0.0 0.342055 \n'Hellboy': The Seeds of Creation (2004) 0.0 0.000000 \n'Round Midnight (1986) 0.0 0.000000 \n\ntitle [REC]² (2009) \\\ntitle \n'71 (2014) 0.543305 \n'Hellboy': The Seeds of Creation (2004) 0.000000 \n'Round Midnight (1986) 0.000000 \n\ntitle [REC]³ 3 Génesis (2012) \\\ntitle \n'71 (2014) 0.707107 \n'Hellboy': The Seeds of Creation (2004) 0.000000 \n'Round Midnight (1986) 0.000000 \n\ntitle anohana: The Flower We Saw That Day - The Movie (2013) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle eXistenZ (1999) xXx (2002) \\\ntitle \n'71 (2014) 0.0 0.139431 \n'Hellboy': The Seeds of Creation (2004) 0.0 0.000000 \n'Round Midnight (1986) 0.0 0.000000 \n\ntitle xXx: State of the Union (2005) \\\ntitle \n'71 (2014) 0.327327 \n'Hellboy': The Seeds of Creation (2004) 0.000000 \n'Round Midnight (1986) 0.000000 \n\ntitle ¡Three Amigos! (1986) \\\ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\ntitle À nous la liberté (Freedom for Us) (1931) \ntitle \n'71 (2014) 0.0 \n'Hellboy': The Seeds of Creation (2004) 0.0 \n'Round Midnight (1986) 0.0 \n\n[3 rows x 9719 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
title'71 (2014)'Hellboy': The Seeds of Creation (2004)'Round Midnight (1986)'Salem's Lot (2004)'Til There Was You (1997)'Tis the Season for Love (2015)'burbs, The (1989)'night Mother (1986)(500) Days of Summer (2009)*batteries not included (1987)...Zulu (2013)[REC] (2007)[REC]² (2009)[REC]³ 3 Génesis (2012)anohana: The Flower We Saw That Day - The Movie (2013)eXistenZ (1999)xXx (2002)xXx: State of the Union (2005)¡Three Amigos! (1986)À nous la liberté (Freedom for Us) (1931)
title
'71 (2014)1.00.0000000.0000000.00.00.00.0000000.00.1416530.0...0.00.3420550.5433050.7071070.00.00.1394310.3273270.00.0
'Hellboy': The Seeds of Creation (2004)0.01.0000000.7071070.00.00.00.0000000.00.0000000.0...0.00.0000000.0000000.0000000.00.00.0000000.0000000.00.0
'Round Midnight (1986)0.00.7071071.0000000.00.00.00.1767770.00.0000000.0...0.00.0000000.0000000.0000000.00.00.0000000.0000000.00.0
\n

3 rows × 9719 columns

\n
"},"metadata":{}}],"execution_count":7},{"cell_type":"code","source":"item_sim_df[\"Godfather, The (1972)\"].sort_values(ascending=False)[:6]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:11:11.399264Z","iopub.execute_input":"2025-12-29T10:11:11.400354Z","iopub.status.idle":"2025-12-29T10:11:11.410641Z","shell.execute_reply.started":"2025-12-29T10:11:11.400319Z","shell.execute_reply":"2025-12-29T10:11:11.409604Z"}},"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"title\nGodfather, The (1972) 1.000000\nGodfather: Part II, The (1974) 0.821773\nGoodfellas (1990) 0.664841\nOne Flew Over the Cuckoo's Nest (1975) 0.620536\nStar Wars: Episode IV - A New Hope (1977) 0.595317\nFargo (1996) 0.588614\nName: Godfather, The (1972), dtype: float64"},"metadata":{}}],"execution_count":8},{"cell_type":"code","source":"item_sim_df[\"Inception (2010)\"].sort_values(ascending=False)[1:6]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:11:42.812668Z","iopub.execute_input":"2025-12-29T10:11:42.812972Z","iopub.status.idle":"2025-12-29T10:11:42.822407Z","shell.execute_reply.started":"2025-12-29T10:11:42.812952Z","shell.execute_reply":"2025-12-29T10:11:42.821538Z"}},"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"title\nDark Knight, The (2008) 0.727263\nInglourious Basterds (2009) 0.646103\nShutter Island (2010) 0.617736\nDark Knight Rises, The (2012) 0.617504\nFight Club (1999) 0.615417\nName: Inception (2010), dtype: float64"},"metadata":{}}],"execution_count":9},{"cell_type":"code","source":"def predict_rating(ratings_arr, item_sim_arr):\n ratings_pred = ratings_arr.dot(item_sim_arr)/np.array([np.abs(item_sim_arr).sum(axis=1)])\n return ratings_pred","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:12:44.397905Z","iopub.execute_input":"2025-12-29T10:12:44.398328Z","iopub.status.idle":"2025-12-29T10:12:44.403687Z","shell.execute_reply.started":"2025-12-29T10:12:44.398297Z","shell.execute_reply":"2025-12-29T10:12:44.402728Z"}},"outputs":[],"execution_count":10},{"cell_type":"code","source":"ratings_pred = predict_rating(ratings_matrix.values, item_sim_df.values)\nratings_pred_matrix = pd.DataFrame(data=ratings_pred, index=ratings_matrix.index, columns=ratings_matrix.columns)\nratings_pred_matrix.head(3)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:19:30.559306Z","iopub.execute_input":"2025-12-29T10:19:30.559663Z","iopub.status.idle":"2025-12-29T10:19:32.561660Z","shell.execute_reply.started":"2025-12-29T10:19:30.559636Z","shell.execute_reply":"2025-12-29T10:19:32.560603Z"}},"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":"title '71 (2014) 'Hellboy': The Seeds of Creation (2004) \\\nuserId \n1 0.070345 0.577855 \n2 0.018260 0.042744 \n3 0.011884 0.030279 \n\ntitle 'Round Midnight (1986) 'Salem's Lot (2004) \\\nuserId \n1 0.321696 0.227055 \n2 0.018861 0.000000 \n3 0.064437 0.003762 \n\ntitle 'Til There Was You (1997) 'Tis the Season for Love (2015) \\\nuserId \n1 0.206958 0.194615 \n2 0.000000 0.035995 \n3 0.003749 0.002722 \n\ntitle 'burbs, The (1989) 'night Mother (1986) (500) Days of Summer (2009) \\\nuserId \n1 0.249883 0.102542 0.157084 \n2 0.013413 0.002314 0.032213 \n3 0.014625 0.002085 0.005666 \n\ntitle *batteries not included (1987) ... Zulu (2013) [REC] (2007) \\\nuserId ... \n1 0.178197 ... 0.113608 0.181738 \n2 0.014863 ... 0.015640 0.020855 \n3 0.006272 ... 0.006923 0.011665 \n\ntitle [REC]² (2009) [REC]³ 3 Génesis (2012) \\\nuserId \n1 0.133962 0.128574 \n2 0.020119 0.015745 \n3 0.011800 0.012225 \n\ntitle anohana: The Flower We Saw That Day - The Movie (2013) \\\nuserId \n1 0.006179 \n2 0.049983 \n3 0.000000 \n\ntitle eXistenZ (1999) xXx (2002) xXx: State of the Union (2005) \\\nuserId \n1 0.212070 0.192921 0.136024 \n2 0.014876 0.021616 0.024528 \n3 0.008194 0.007017 0.009229 \n\ntitle ¡Three Amigos! (1986) À nous la liberté (Freedom for Us) (1931) \nuserId \n1 0.292955 0.720347 \n2 0.017563 0.000000 \n3 0.010420 0.084501 \n\n[3 rows x 9719 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
title'71 (2014)'Hellboy': The Seeds of Creation (2004)'Round Midnight (1986)'Salem's Lot (2004)'Til There Was You (1997)'Tis the Season for Love (2015)'burbs, The (1989)'night Mother (1986)(500) Days of Summer (2009)*batteries not included (1987)...Zulu (2013)[REC] (2007)[REC]² (2009)[REC]³ 3 Génesis (2012)anohana: The Flower We Saw That Day - The Movie (2013)eXistenZ (1999)xXx (2002)xXx: State of the Union (2005)¡Three Amigos! (1986)À nous la liberté (Freedom for Us) (1931)
userId
10.0703450.5778550.3216960.2270550.2069580.1946150.2498830.1025420.1570840.178197...0.1136080.1817380.1339620.1285740.0061790.2120700.1929210.1360240.2929550.720347
20.0182600.0427440.0188610.0000000.0000000.0359950.0134130.0023140.0322130.014863...0.0156400.0208550.0201190.0157450.0499830.0148760.0216160.0245280.0175630.000000
30.0118840.0302790.0644370.0037620.0037490.0027220.0146250.0020850.0056660.006272...0.0069230.0116650.0118000.0122250.0000000.0081940.0070170.0092290.0104200.084501
\n

3 rows × 9719 columns

\n
"},"metadata":{}}],"execution_count":11},{"cell_type":"code","source":"from sklearn.metrics import mean_squared_error\n\ndef get_mse(pred, actual):\n pred = pred[actual.nonzero()].flatten()\n actual = actual[actual.nonzero()].flatten()\n return mean_squared_error(pred, actual)\n\nprint('아이템 기반 모든 최근접 이웃 MSE: ', get_mse(ratings_pred, ratings_matrix.values))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:22:39.947119Z","iopub.execute_input":"2025-12-29T10:22:39.947439Z","iopub.status.idle":"2025-12-29T10:22:40.247623Z","shell.execute_reply.started":"2025-12-29T10:22:39.947417Z","shell.execute_reply":"2025-12-29T10:22:40.246664Z"}},"outputs":[{"name":"stdout","text":"아이템 기반 모든 최근접 이웃 MSE: 9.895354759094705\n","output_type":"stream"}],"execution_count":14},{"cell_type":"code","source":"def predict_rating_topsim(ratings_arr, item_sim_arr, n=20):\n pred = np.zeros(ratings_arr.shape)\n for col in range(ratings_arr.shape[1]):\n top_n_items = [np.argsort(item_sim_arr[:, col])[:-n-1:-1]]\n for row in range(ratings_arr.shape[0]):\n pred[row, col] = item_sim_arr[col, :][top_n_items].dot(ratings_arr[row, :][top_n_items].T)\n pred[row, col] /= np.sum(np.abs(item_sim_arr[col, :][top_n_items]))\n return pred","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:28:43.839304Z","iopub.execute_input":"2025-12-29T10:28:43.839681Z","iopub.status.idle":"2025-12-29T10:28:43.846322Z","shell.execute_reply.started":"2025-12-29T10:28:43.839657Z","shell.execute_reply":"2025-12-29T10:28:43.845198Z"}},"outputs":[],"execution_count":15},{"cell_type":"code","source":"ratings_pred = predict_rating_topsim(ratings_matrix.values, item_sim_df.values, n=20)\nprint('아이템 기반 최근접 Top-20 이웃 MSE: ', get_mse(ratings_pred, ratings_matrix.values))\n\nratings_pred_matrix = pd.DataFrame(data=ratings_pred, index=ratings_matrix.index, columns = ratings_matrix.columns)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:31:02.142140Z","iopub.execute_input":"2025-12-29T10:31:02.142522Z","iopub.status.idle":"2025-12-29T10:32:32.744599Z","shell.execute_reply.started":"2025-12-29T10:31:02.142496Z","shell.execute_reply":"2025-12-29T10:32:32.743611Z"}},"outputs":[{"name":"stderr","text":"/tmp/ipykernel_47/1017226475.py:6: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)\n pred[row, col] = item_sim_arr[col, :][top_n_items].dot(ratings_arr[row, :][top_n_items].T)\n","output_type":"stream"},{"name":"stdout","text":"아이템 기반 최근접 Top-20 이웃 MSE: 3.6950162372949396\n","output_type":"stream"}],"execution_count":16},{"cell_type":"code","source":"user_rating_id = ratings_matrix.loc[9, :]\nuser_rating_id[user_rating_id>0].sort_values(ascending=False)[:10]","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:42:54.073543Z","iopub.execute_input":"2025-12-29T10:42:54.073871Z","iopub.status.idle":"2025-12-29T10:42:54.083751Z","shell.execute_reply.started":"2025-12-29T10:42:54.073848Z","shell.execute_reply":"2025-12-29T10:42:54.082688Z"}},"outputs":[{"execution_count":17,"output_type":"execute_result","data":{"text/plain":"title\nAdaptation (2002) 5.0\nCitizen Kane (1941) 5.0\nRaiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) 5.0\nProducers, The (1968) 5.0\nLord of the Rings: The Two Towers, The (2002) 5.0\nLord of the Rings: The Fellowship of the Ring, The (2001) 5.0\nBack to the Future (1985) 5.0\nAustin Powers in Goldmember (2002) 5.0\nMinority Report (2002) 4.0\nWitness (1985) 4.0\nName: 9, dtype: float64"},"metadata":{}}],"execution_count":17},{"cell_type":"code","source":"def get_unseen_movies(ratings_matrix, userId):\n user_rating = ratings_matrix.loc[userId, :]\n already_seen = user_rating[user_rating>0].index.tolist()\n movies_list = ratings_matrix.columns.tolist()\n unseen_list = [movie for movie in movies_list if movie not in already_seen]\n return unseen_list","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T10:47:00.979521Z","iopub.execute_input":"2025-12-29T10:47:00.979874Z","iopub.status.idle":"2025-12-29T10:47:00.987793Z","shell.execute_reply.started":"2025-12-29T10:47:00.979848Z","shell.execute_reply":"2025-12-29T10:47:00.986185Z"}},"outputs":[],"execution_count":18},{"cell_type":"code","source":"def recomm_movie_by_userid(pred_df, userId, unseen_list, top_n=10):\n recomm_movies = pred_df.loc[userId, unseen_list].sort_values(ascending=False)[:top_n]\n return recomm_movies\n\nunseen_list = get_unseen_movies(ratings_matrix, 9)\n\nrecomm_movies = recomm_movie_by_userid(ratings_pred_matrix, 9, unseen_list, top_n=10)\n\nrecomm_movies = pd.DataFrame(data=recomm_movies.values, index=recomm_movies.index, columns=['pred_score'])\nrecomm_movies","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-12-29T11:05:54.839949Z","iopub.execute_input":"2025-12-29T11:05:54.840822Z","iopub.status.idle":"2025-12-29T11:05:54.871796Z","shell.execute_reply.started":"2025-12-29T11:05:54.840792Z","shell.execute_reply":"2025-12-29T11:05:54.870808Z"}},"outputs":[{"execution_count":20,"output_type":"execute_result","data":{"text/plain":" pred_score\ntitle \nShrek (2001) 0.866202\nSpider-Man (2002) 0.857854\nLast Samurai, The (2003) 0.817473\nIndiana Jones and the Temple of Doom (1984) 0.816626\nMatrix Reloaded, The (2003) 0.800990\nHarry Potter and the Sorcerer's Stone (a.k.a. H... 0.765159\nGladiator (2000) 0.740956\nMatrix, The (1999) 0.732693\nPirates of the Caribbean: The Curse of the Blac... 0.689591\nLord of the Rings: The Return of the King, The ... 0.676711","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
pred_score
title
Shrek (2001)0.866202
Spider-Man (2002)0.857854
Last Samurai, The (2003)0.817473
Indiana Jones and the Temple of Doom (1984)0.816626
Matrix Reloaded, The (2003)0.800990
Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)0.765159
Gladiator (2000)0.740956
Matrix, The (1999)0.732693
Pirates of the Caribbean: The Curse of the Black Pearl (2003)0.689591
Lord of the Rings: The Return of the King, The (2003)0.676711
\n
"},"metadata":{}}],"execution_count":20}]} \ No newline at end of file diff --git a/week16_9_7.ipynb b/week16_9_7.ipynb new file mode 100644 index 0000000..65bcf36 --- /dev/null +++ b/week16_9_7.ipynb @@ -0,0 +1,1065 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "0YLV37WhCzLU" + }, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "\n", + "def get_rmse(R, P, Q, non_zeros):\n", + " error = 0\n", + " # 예측 행렬 R_hat = P * Q.T\n", + " full_pred_matrix = np.dot(P, Q.T)\n", + "\n", + " # 실제 값 R에서 0이 아닌 값의 위치만 추출하여 오차 계산\n", + " x_non_zero_ind = [non_zero[0] for non_zero in non_zeros]\n", + " y_non_zero_ind = [non_zero[1] for non_zero in non_zeros]\n", + " R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]\n", + "\n", + " predicted_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]\n", + "\n", + " mse = mean_squared_error(R_non_zeros, predicted_non_zeros)\n", + " rmse = np.sqrt(mse)\n", + "\n", + " return rmse\n", + "def matrix_factorization(R, K, steps=200, learning_rate=0.01, r_lambda = 0.01 ):\n", + " num_users, num_items = R.shape\n", + " np.random.seed(1)\n", + " P = np.random.normal(scale=1./K, size=(num_users, K))\n", + " Q = np.random.normal(scale=1./K, size=(num_items, K))\n", + " non_zeros = [ (i, j, R[i, j]) for i in range(num_users) for j in range(num_items) if R[i, j]>0]\n", + " for step in range(steps):\n", + " for i, j, r in non_zeros:\n", + " eij = r - np.dot(P[i, :], Q[j, :].T)\n", + " P[i, :] = P[i, :] + learning_rate*(eij * Q[j, :] - r_lambda*P[i, :])\n", + " Q[j, :] = Q[j, :] + learning_rate*(eij * P[i, :] - r_lambda*Q[j, :])\n", + " rmse = get_rmse(R, P, Q, non_zeros)\n", + " if (step % 10) == 0 :\n", + " print(\"### iteration step : \", step, \" rmse : \", rmse)\n", + " return P, Q\n" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "# 현재 폴더에 무엇이 있는지 확인\n", + "print(\"현재 폴더 파일:\", os.listdir('.'))\n", + "\n", + "# 만약 폴더 안에 폴더가 있다면 전체를 뒤져서 movies.csv를 찾습니다.\n", + "for root, dirs, files in os.walk('/kaggle/working/'): # 캐글 기준\n", + " for file in files:\n", + " if file.endswith(\".csv\"):\n", + " print(f\"찾았습니다! 전체 경로: {os.path.join(root, file)}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k65D7W6K3ac0", + "outputId": "556a34d9-7573-4c7f-b7ea-03431552e9ab" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "현재 폴더 파일: ['.config', 'movies.csv', 'tags.csv', 'links.csv', 'README.txt', 'ratings.csv', 'sample_data']\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "movies = pd.read_csv('movies.csv')\n", + "ratings = pd.read_csv('ratings.csv')\n", + "ratings = ratings[['userId', 'movieId', 'rating']]\n", + "ratings_matrix = ratings.pivot_table('rating', index='userId', columns='movieId')\n", + "\n", + "rating_movies = pd.merge(ratings, movies, on='movieId')\n", + "ratings_matrix = rating_movies.pivot_table('rating', index='userId', columns='title')" + ], + "metadata": { + "id": "qAfIDvnS1mUt" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "P, Q = matrix_factorization(ratings_matrix.values, K=50, steps=200, learning_rate=0.01, r_lambda = 0.01)\n", + "pred_matrix = np.dot(P, Q.T)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EV8ZPWgI3ilq", + "outputId": "402409e3-a1f5-4fa2-cbed-4babb6d4740f" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "### iteration step : 0 rmse : 2.9023619751336867\n", + "### iteration step : 10 rmse : 0.7335768591017927\n", + "### iteration step : 20 rmse : 0.5115539026853442\n", + "### iteration step : 30 rmse : 0.37261628282537446\n", + "### iteration step : 40 rmse : 0.2960818299181014\n", + "### iteration step : 50 rmse : 0.2520353192341642\n", + "### iteration step : 60 rmse : 0.22487503275269854\n", + "### iteration step : 70 rmse : 0.2068545530233154\n", + "### iteration step : 80 rmse : 0.19413418783028685\n", + "### iteration step : 90 rmse : 0.18470082002720406\n", + "### iteration step : 100 rmse : 0.17742927527209104\n", + "### iteration step : 110 rmse : 0.1716522696470749\n", + "### iteration step : 120 rmse : 0.16695181946871726\n", + "### iteration step : 130 rmse : 0.16305292191997542\n", + "### iteration step : 140 rmse : 0.15976691929679646\n", + "### iteration step : 150 rmse : 0.1569598699945732\n", + "### iteration step : 160 rmse : 0.15453398186715425\n", + "### iteration step : 170 rmse : 0.15241618551077643\n", + "### iteration step : 180 rmse : 0.1505508073962831\n", + "### iteration step : 190 rmse : 0.1488947091323209\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "ratings_pred_matrix = pd.DataFrame(data=pred_matrix, index=ratings_matrix.index, columns=ratings_matrix.columns)\n", + "ratings_pred_matrix.head(3)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 345 + }, + "id": "Nh6QFn6K4i27", + "outputId": "6554de05-f74e-4c58-9ece-fd4e7de751c1" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "title '71 (2014) 'Hellboy': The Seeds of Creation (2004) \\\n", + "userId \n", + "1 3.055084 4.092018 \n", + "2 3.170119 3.657992 \n", + "3 2.307073 1.658853 \n", + "\n", + "title 'Round Midnight (1986) 'Salem's Lot (2004) \\\n", + "userId \n", + "1 3.564130 4.502167 \n", + "2 3.308707 4.166521 \n", + "3 1.443538 2.208859 \n", + "\n", + "title 'Til There Was You (1997) 'Tis the Season for Love (2015) \\\n", + "userId \n", + "1 3.981215 1.271694 \n", + "2 4.311890 1.275469 \n", + "3 2.229486 0.780760 \n", + "\n", + "title 'burbs, The (1989) 'night Mother (1986) (500) Days of Summer (2009) \\\n", + "userId \n", + "1 3.603274 2.333266 5.091749 \n", + "2 4.237972 1.900366 3.392859 \n", + "3 1.997043 0.924908 2.970700 \n", + "\n", + "title *batteries not included (1987) ... Zulu (2013) [REC] (2007) \\\n", + "userId ... \n", + "1 3.972454 ... 1.402608 4.208382 \n", + "2 3.647421 ... 0.973811 3.528264 \n", + "3 2.551446 ... 0.520354 1.709494 \n", + "\n", + "title [REC]² (2009) [REC]³ 3 Génesis (2012) \\\n", + "userId \n", + "1 3.705957 2.720514 \n", + "2 3.361532 2.672535 \n", + "3 2.281596 1.782833 \n", + "\n", + "title anohana: The Flower We Saw That Day - The Movie (2013) \\\n", + "userId \n", + "1 2.787331 \n", + "2 2.404456 \n", + "3 1.635173 \n", + "\n", + "title eXistenZ (1999) xXx (2002) xXx: State of the Union (2005) \\\n", + "userId \n", + "1 3.475076 3.253458 2.161087 \n", + "2 4.232789 2.911602 1.634576 \n", + "3 1.323276 2.887580 1.042618 \n", + "\n", + "title ¡Three Amigos! (1986) À nous la liberté (Freedom for Us) (1931) \n", + "userId \n", + "1 4.010495 0.859474 \n", + "2 4.135735 0.725684 \n", + "3 2.293890 0.396941 \n", + "\n", + "[3 rows x 9719 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
title'71 (2014)'Hellboy': The Seeds of Creation (2004)'Round Midnight (1986)'Salem's Lot (2004)'Til There Was You (1997)'Tis the Season for Love (2015)'burbs, The (1989)'night Mother (1986)(500) Days of Summer (2009)*batteries not included (1987)...Zulu (2013)[REC] (2007)[REC]² (2009)[REC]³ 3 Génesis (2012)anohana: The Flower We Saw That Day - The Movie (2013)eXistenZ (1999)xXx (2002)xXx: State of the Union (2005)¡Three Amigos! (1986)À nous la liberté (Freedom for Us) (1931)
userId
13.0550844.0920183.5641304.5021673.9812151.2716943.6032742.3332665.0917493.972454...1.4026084.2083823.7059572.7205142.7873313.4750763.2534582.1610874.0104950.859474
23.1701193.6579923.3087074.1665214.3118901.2754694.2379721.9003663.3928593.647421...0.9738113.5282643.3615322.6725352.4044564.2327892.9116021.6345764.1357350.725684
32.3070731.6588531.4435382.2088592.2294860.7807601.9970430.9249082.9707002.551446...0.5203541.7094942.2815961.7828331.6351731.3232762.8875801.0426182.2938900.396941
\n", + "

3 rows × 9719 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "ratings_pred_matrix" + } + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def get_unseen_movies(ratings_matrix, userId):\n", + " # userId로 입력받은 사용자의 모든 영화 정보를 추출하여 Series로 반환함.\n", + " # user_rating은 영화명을 인덱스로 가지는 Series 객체임.\n", + " user_rating = ratings_matrix.loc[userId, :]\n", + "\n", + " # user_rating이 0보다 크면 기존에 관람한 영화임. 대상 index를 추출하여 list 객체로 만듦\n", + " already_seen = user_rating[user_rating > 0].index.tolist()\n", + "\n", + " # 모든 영화명을 list 객체로 만듦.\n", + " movies_list = ratings_matrix.columns.tolist()\n", + "\n", + " # list comprehension으로 already_seen에 해당하는 영화는 movies_list에서 제외함.\n", + " unseen_list = [ movie for movie in movies_list if movie not in already_seen]\n", + "\n", + " return unseen_list" + ], + "metadata": { + "id": "qUKedF_U6Qh-" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "# 1. 추천 함수 정의 (사용자 ID를 기반으로 보지 않은 영화 중 예측 점수가 높은 영화 추출)\n", + "def recomm_movie_by_userid(pred_df, userId, unseen_list, top_n=10):\n", + " # 예측 점수 행렬(pred_df)에서 해당 사용자의 데이터만 뽑아, 안 본 영화들의 점수를 내림차순 정렬\n", + " recomm_movies = pred_df.loc[userId, unseen_list].sort_values(ascending=False)[:top_n]\n", + " return recomm_movies\n", + "\n", + "# 2. (위에서 정의한) get_unseen_movies 함수가 실행되어 있어야 합니다.\n", + "# 9번 사용자가 관람하지 않은 영화 리스트를 가져옵니다.\n", + "try:\n", + " unseen_list = get_unseen_movies(ratings_matrix, 9)\n", + "\n", + " # 3. 함수 호출 (이제 여기서 NameError가 나지 않습니다)\n", + " recomm_movies_series = recomm_movie_by_userid(ratings_pred_matrix, 9, unseen_list, top_n=10)\n", + "\n", + " # 4. 결과를 보기 좋게 데이터프레임으로 변환\n", + " recomm_movies = pd.DataFrame(data=recomm_movies_series.values,\n", + " index=recomm_movies_series.index,\n", + " columns=['pred_score'])\n", + "\n", + " print(\"9번 사용자를 위한 추천 영화 TOP 10:\")\n", + " print(recomm_movies)\n", + "\n", + "except NameError as e:\n", + " print(f\"에러 발생: {e}. 'get_unseen_movies' 함수가 들어있는 셀을 먼저 실행했는지 확인해주세요.\")" + ], + "metadata": { + "id": "Exsm-Wft6wPz", + "outputId": "abf7ed82-22ff-41da-8ffa-903f1617c7d0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "9번 사용자를 위한 추천 영화 TOP 10:\n", + " pred_score\n", + "title \n", + "Rear Window (1954) 5.704612\n", + "South Park: Bigger, Longer and Uncut (1999) 5.451100\n", + "Rounders (1998) 5.298393\n", + "Blade Runner (1982) 5.244951\n", + "Roger & Me (1989) 5.191962\n", + "Gattaca (1997) 5.183179\n", + "Ben-Hur (1959) 5.130463\n", + "Rosencrantz and Guildenstern Are Dead (1990) 5.087375\n", + "Big Lebowski, The (1998) 5.038690\n", + "Star Wars: Episode V - The Empire Strikes Back ... 4.989601\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "unseen_list = get_unseen_movies(ratings_matrix, 9)\n", + "recomm_movies = recomm_movie_by_userid(ratings_pred_matrix, 9, unseen_list, top_n=10)\n", + "recomm_movies = pd.DataFrame(data=recomm_movies.values, index=recomm_movies.index, columns=['pred_score'])\n", + "recomm_movies" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 394 + }, + "id": "KUfQAsyS4or3", + "outputId": "78556440-c2c4-40bb-a99f-2fac2386380b" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " pred_score\n", + "title \n", + "Rear Window (1954) 5.704612\n", + "South Park: Bigger, Longer and Uncut (1999) 5.451100\n", + "Rounders (1998) 5.298393\n", + "Blade Runner (1982) 5.244951\n", + "Roger & Me (1989) 5.191962\n", + "Gattaca (1997) 5.183179\n", + "Ben-Hur (1959) 5.130463\n", + "Rosencrantz and Guildenstern Are Dead (1990) 5.087375\n", + "Big Lebowski, The (1998) 5.038690\n", + "Star Wars: Episode V - The Empire Strikes Back ... 4.989601" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pred_score
title
Rear Window (1954)5.704612
South Park: Bigger, Longer and Uncut (1999)5.451100
Rounders (1998)5.298393
Blade Runner (1982)5.244951
Roger & Me (1989)5.191962
Gattaca (1997)5.183179
Ben-Hur (1959)5.130463
Rosencrantz and Guildenstern Are Dead (1990)5.087375
Big Lebowski, The (1998)5.038690
Star Wars: Episode V - The Empire Strikes Back (1980)4.989601
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "recomm_movies", + "summary": "{\n \"name\": \"recomm_movies\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Big Lebowski, The (1998)\",\n \"South Park: Bigger, Longer and Uncut (1999)\",\n \"Gattaca (1997)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pred_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.21272885538651393,\n \"min\": 4.989601238872484,\n \"max\": 5.704612469838172,\n \"num_unique_values\": 10,\n \"samples\": [\n 5.0386897288205725,\n 5.451100205772531,\n 5.183178550884765\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 20 + } + ] + } + ] +} \ No newline at end of file