diff --git "a/Week15_\353\263\265\354\212\265\352\263\274\354\240\234_\354\235\264\354\204\234\354\225\204.ipynb" "b/Week15_\353\263\265\354\212\265\352\263\274\354\240\234_\354\235\264\354\204\234\354\225\204.ipynb" new file mode 100644 index 0000000..c242c4b --- /dev/null +++ "b/Week15_\353\263\265\354\212\265\352\263\274\354\240\234_\354\235\264\354\204\234\354\225\204.ipynb" @@ -0,0 +1,1115 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "f3cff5ef87284e0f930b072944e503bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_25118ed75a174274a61441a3924730b4", + "IPY_MODEL_6f71aa69d211437285ea91ba321a3cd2", + "IPY_MODEL_382e9f567e134b1883392fa8410c453a" + ], + "layout": "IPY_MODEL_bbada3e281e345fc9cee2ac721fc98d5" + } + }, + "25118ed75a174274a61441a3924730b4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b6420aa2b5c4b3394d6e2e7b09e12fd", + "placeholder": "​", + "style": "IPY_MODEL_9a462bcee0ae4fc2aeab90d138722a1b", + "value": "Map: 100%" + } + }, + "6f71aa69d211437285ea91ba321a3cd2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_19d937139d5743649dd4994a26b837d3", + "max": 872, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1dd1ce770da3451e94c64f1ade7f7f9a", + "value": 872 + } + }, + "382e9f567e134b1883392fa8410c453a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ef8e7b360024b7fbd23838a0c2da9ac", + "placeholder": "​", + "style": "IPY_MODEL_811e4748eed5449c90684aed9372cda9", + "value": " 872/872 [00:00<00:00, 3913.04 examples/s]" + } + }, + "bbada3e281e345fc9cee2ac721fc98d5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b6420aa2b5c4b3394d6e2e7b09e12fd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a462bcee0ae4fc2aeab90d138722a1b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "19d937139d5743649dd4994a26b837d3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1dd1ce770da3451e94c64f1ade7f7f9a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8ef8e7b360024b7fbd23838a0c2da9ac": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "811e4748eed5449c90684aed9372cda9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "38aa48585df24862995017cced76c85e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1d2766f1d5384a4daf2e8a8ff7f8e6c6", + "IPY_MODEL_8cc4cfcecfe84982afff4b4949eda172", + "IPY_MODEL_98db4cf399334dfcaa862f5010e826f2" + ], + "layout": "IPY_MODEL_cb41186038f74475a191d85ff778d6bb" + } + }, + "1d2766f1d5384a4daf2e8a8ff7f8e6c6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6fc354f5bbad47fd8f920631726d6b9a", + "placeholder": "​", + "style": "IPY_MODEL_5f209d2b60d64ee79be12fcccf7f313c", + "value": "model.safetensors: 100%" + } + }, + "8cc4cfcecfe84982afff4b4949eda172": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_360a8b3c1e7e4ac6ad556ca103f8e877", + "max": 440449768, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1b600acb298b4a48a4247735dc21ce64", + "value": 440449768 + } + }, + "98db4cf399334dfcaa862f5010e826f2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f3b1ec23c75c4f998048b2e377d07df1", + "placeholder": "​", + "style": "IPY_MODEL_573e3cfd02514746870a85186baf5cb0", + "value": " 440M/440M [00:06<00:00, 96.2MB/s]" + } + }, + "cb41186038f74475a191d85ff778d6bb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6fc354f5bbad47fd8f920631726d6b9a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f209d2b60d64ee79be12fcccf7f313c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "360a8b3c1e7e4ac6ad556ca103f8e877": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b600acb298b4a48a4247735dc21ce64": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f3b1ec23c75c4f998048b2e377d07df1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "573e3cfd02514746870a85186baf5cb0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "id": "4644be45", + "cell_type": "markdown", + "source": [ + "# BERT와 ELECTRA 모델 비교 실습\n", + "\n", + "- 이번 복습과제에서는 SST-2 데이터셋을 기반으로 BERT와 ELECTRA 모델을 학습시켜보고 성능과 구조의 차이를 알아보겠습니다.\n", + "- 코드 실행시간이 매우 길 수 있습니다.\n", + " - 최대한 끝까지 실행해보시되, 시간 부족으로 인해 중간에 중지하신 실행 결과를 제출하셔도 괜찮습니다.\n", + " - 제출 이후에는 꼭 끝까지 실행시켜 비교해보시기 바랍니다!" + ], + "metadata": { + "id": "4644be45" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install --upgrade --quiet datasets fsspec huggingface_hub" + ], + "metadata": { + "id": "2eSXxqg4a9SF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5a95390e-19d2-44a3-cba1-d98750e9a941" + }, + "id": "2eSXxqg4a9SF", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/512.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m512.3/512.3 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/201.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.0/201.0 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m521.0/521.0 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.7/47.7 MB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "transformers 4.57.3 requires huggingface-hub<1.0,>=0.34.0, but you have huggingface-hub 1.2.3 which is incompatible.\n", + "gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2025.10.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "---------------\n", + "여기까지만 실행\n", + "---------------\n", + "그 다음, 런타임 > 세션 다시 시작 > 아래 셀부터 실행" + ], + "metadata": { + "id": "4rq77NfBbByn" + }, + "id": "4rq77NfBbByn" + }, + { + "cell_type": "code", + "source": [ + "!pip install -U transformers huggingface-hub==0.34. datasets" + ], + "metadata": { + "id": "rQP4VIruVqk5" + }, + "id": "rQP4VIruVqk5", + "execution_count": null, + "outputs": [] + }, + { + "id": "ac52f320", + "cell_type": "code", + "metadata": { + "id": "ac52f320", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "025b2e7d-8a76-44fc-f805-eb8e02c9cfd1" + }, + "execution_count": 2, + "source": [ + "!pip install huggingface-hub==0.34.0 --quiet" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/558.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m558.7/558.7 kB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "from torch.utils.data import DataLoader\n", + "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", + "from datasets import load_dataset\n", + "from torch.optim import AdamW\n", + "from tqdm import tqdm" + ], + "metadata": { + "id": "i-nahP67XVPr" + }, + "id": "i-nahP67XVPr", + "execution_count": 2, + "outputs": [] + }, + { + "id": "393f4136", + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "393f4136", + "outputId": "db9b29f9-1cba-434c-9912-4046073b4ba0" + }, + "execution_count": 3, + "source": [ + "# batch_size와 epochs를 조정해보세요!\n", + "batch_size = 16\n", + "epochs = 2\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "print(f\"Using device: {device}\")" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using device: cuda\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 데이터셋 로드\n", + "raw_datasets = load_dataset(\"sst2\")\n", + "raw_datasets" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QTVKkGiIflzk", + "outputId": "05c03818-1d8f-49b7-96f3-28585234e900" + }, + "id": "QTVKkGiIflzk", + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DatasetDict({\n", + " train: Dataset({\n", + " features: ['idx', 'sentence', 'label'],\n", + " num_rows: 67349\n", + " })\n", + " validation: Dataset({\n", + " features: ['idx', 'sentence', 'label'],\n", + " num_rows: 872\n", + " })\n", + " test: Dataset({\n", + " features: ['idx', 'sentence', 'label'],\n", + " num_rows: 1821\n", + " })\n", + "})" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 전처리\n", + "def tokenize_function(examples, tokenizer):\n", + " return tokenizer(examples[\"sentence\"], padding=\"max_length\", truncation=True, max_length=128)" + ], + "metadata": { + "id": "Y_zJtt34ftoZ" + }, + "id": "Y_zJtt34ftoZ", + "execution_count": 5, + "outputs": [] + }, + { + "id": "4e3b88e4", + "cell_type": "markdown", + "source": [ + "## 🔹 BERT와 ELECTRA 실험" + ], + "metadata": { + "id": "4e3b88e4" + } + }, + { + "cell_type": "code", + "source": [ + "# 학습 함수 정의\n", + "def train_and_evaluate(model_name):\n", + " print(f\"\\n======== Now Training: {model_name} ========\")\n", + "\n", + " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + " tokenized_datasets = raw_datasets.map(lambda x: tokenize_function(x, tokenizer), batched=True)\n", + "\n", + " tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", + " tokenized_datasets.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n", + " train_dataset = tokenized_datasets[\"train\"]\n", + " valid_dataset = tokenized_datasets[\"validation\"]\n", + "\n", + " train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n", + " valid_loader = DataLoader(valid_dataset, batch_size=batch_size)\n", + "\n", + " model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to(device)\n", + " optimizer = AdamW(model.parameters(), lr=2e-5)\n", + "\n", + " model.train()\n", + " for epoch in range(2):\n", + " total_loss = 0\n", + " for batch in tqdm(train_loader, desc=f\"Epoch {epoch+1}\"):\n", + " batch = {k: v.to(device) for k, v in batch.items()}\n", + " outputs = model(\n", + " input_ids=batch[\"input_ids\"],\n", + " attention_mask=batch[\"attention_mask\"],\n", + " labels=batch[\"labels\"],\n", + " )\n", + " loss = outputs.loss\n", + " total_loss += loss.item()\n", + "\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + "\n", + " avg_loss = total_loss/ len(train_loader)\n", + " print(f\"Epoch {epoch+1} - Avg Train Loss: {avg_loss:.4f}\")\n", + "\n", + " model.eval()\n", + " correct = 0\n", + " total = 0\n", + " with torch.no_grad():\n", + " for batch in valid_loader:\n", + " batch = {k: v.to(device) for k, v in batch.items()}\n", + " outputs = model(**batch)\n", + " predictions = torch.argmax(outputs.logits, dim=-1)\n", + " correct += (predictions == batch[\"labels\"]).sum().item()\n", + " total += batch[\"labels\"].size(0)\n", + "\n", + " acc = correct / total\n", + " print(f\"Validation Accuracy ({model_name}): {acc:.4f}\")\n", + " return acc\n", + "\n", + "# 실행 및 평가\n", + "bert_acc = train_and_evaluate(\"bert-base-uncased\")\n", + "electra_acc = train_and_evaluate(\"google/electra-base-discriminator\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 232, + "referenced_widgets": [ + "f3cff5ef87284e0f930b072944e503bf", + "25118ed75a174274a61441a3924730b4", + "6f71aa69d211437285ea91ba321a3cd2", + "382e9f567e134b1883392fa8410c453a", + "bbada3e281e345fc9cee2ac721fc98d5", + "1b6420aa2b5c4b3394d6e2e7b09e12fd", + "9a462bcee0ae4fc2aeab90d138722a1b", + "19d937139d5743649dd4994a26b837d3", + "1dd1ce770da3451e94c64f1ade7f7f9a", + "8ef8e7b360024b7fbd23838a0c2da9ac", + "811e4748eed5449c90684aed9372cda9", + "38aa48585df24862995017cced76c85e", + "1d2766f1d5384a4daf2e8a8ff7f8e6c6", + "8cc4cfcecfe84982afff4b4949eda172", + "98db4cf399334dfcaa862f5010e826f2", + "cb41186038f74475a191d85ff778d6bb", + "6fc354f5bbad47fd8f920631726d6b9a", + "5f209d2b60d64ee79be12fcccf7f313c", + "360a8b3c1e7e4ac6ad556ca103f8e877", + "1b600acb298b4a48a4247735dc21ce64", + "f3b1ec23c75c4f998048b2e377d07df1", + "573e3cfd02514746870a85186baf5cb0" + ] + }, + "id": "_ehi3D9Mf9ov", + "outputId": "404408cc-df9f-4725-f6f1-1669b85ee6c7" + }, + "id": "_ehi3D9Mf9ov", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "======== Now Training: bert-base-uncased ========\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Map: 0%| | 0/872 [00:00