add exercise solutions

rasbt · rasbt · commit b39e6ef69c1e · 2024-07-09T10:38:06.000-07:00
diff --git a/02_data/02.ipynb b/02_data/02.ipynb
@@ -584,7 +584,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/03_architecture/03.ipynb b/03_architecture/03.ipynb
@@ -399,6 +399,83 @@
    "source": [
     "model.eval();  # disable dropout"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "02fa7ae0-f30d-454c-a92a-a75894ea68d2",
+   "metadata": {},
+   "source": [
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Solution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fdc23e58-dd3f-48d8-a767-944c1b6e030f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_context = \"Hello, I am\"\n",
+    "\n",
+    "encoded = tokenizer.encode(start_context)\n",
+    "print(\"encoded:\", encoded)\n",
+    "\n",
+    "encoded_tensor = torch.tensor(encoded).unsqueeze(0)\n",
+    "print(\"encoded_tensor.shape:\", encoded_tensor.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "599b0821-9755-4cf1-8da4-a1c0fec448b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out = generate_text_simple(\n",
+    "    model=model,\n",
+    "    idx=encoded_tensor, \n",
+    "    max_new_tokens=6, \n",
+    "    context_size=GPT_CONFIG_124M[\"context_length\"]\n",
+    ")\n",
+    "\n",
+    "print(\"Output:\", out)\n",
+    "print(\"Output length:\", len(out[0]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6de4f875-f967-4089-8410-b5cd2c200de8",
+   "metadata": {},
+   "source": [
+    "- Remove batch dimension and convert back into text:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74c8d848-8ac1-41d4-b229-72ba7698297c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "decoded_text = tokenizer.decode(out.squeeze(0).tolist())\n",
+    "print(decoded_text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c538bcd-a209-4273-9527-60d6fef1f6ab",
+   "metadata": {},
+   "source": [
+    "- Note that the model is untrained; hence the random output texts above\n",
+    "- We will train the model in the next notebook"
+   ]
   }
  ],
  "metadata": {
@@ -417,7 +494,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/04_pretraining/04.ipynb b/04_pretraining/04.ipynb
@@ -695,6 +695,81 @@
     "\n",
     "# Exercise 3 (Optional): Train the LLM on your own favorite texts"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11f349d5-35e4-4502-8b86-ab57b5ca2f0c",
+   "metadata": {},
+   "source": [
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "\n",
+    "\n",
+    "# Solution to Exercise 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f564c82a-49f7-46da-ad78-b9cb846eb5e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_context = \"Every effort moves you\"\n",
+    "tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
+    "\n",
+    "token_ids = generate_text_simple(\n",
+    "    model=model,\n",
+    "    idx=text_to_token_ids(start_context, tokenizer),\n",
+    "    max_new_tokens=10,\n",
+    "    context_size=GPT_CONFIG_124M[\"context_length\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e9d58e1-afba-44c7-9f82-7516adff359d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Output text:\\n\", token_ids_to_text(token_ids, tokenizer))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b64b3b1f-c8d3-4755-a926-dc86eeae0ba0",
+   "metadata": {},
+   "source": [
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "\n",
+    "\n",
+    "# Solution to Exercise 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a998656c-3615-4673-a9f9-c8eefb6b6611",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Imports from a local file\n",
+    "from supplementary import GPTModel\n",
+    "\n",
+    "\n",
+    "model = GPTModel(GPT_CONFIG_124M)\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "model.load_state_dict(torch.load(\"model.pth\", map_location=device))\n",
+    "model.eval();"
+   ]
   }
  ],
  "metadata": {
@@ -719,7 +794,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/05_weightloading/05_part-1.ipynb b/05_weightloading/05_part-1.ipynb
@@ -407,7 +407,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/06_finetuning/06_part-1.ipynb b/06_finetuning/06_part-1.ipynb
@@ -389,7 +389,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/06_finetuning/06_part-2.ipynb b/06_finetuning/06_part-2.ipynb
@@ -369,6 +369,19 @@
     "- Save the resulting `test_data` dictionary as `test_base_and_finetuned_model.json`"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "fbdeb748-e3a9-4671-ae9b-046e6f298044",
+   "metadata": {},
+   "source": [
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "\n",
+    "# Solution"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -379,7 +392,7 @@
     "from litgpt import LLM\n",
     "\n",
     "del llm\n",
-    "llm2 = LLM.load(\"/teamspace/studios/this_studio/out/finetune/lora/final/\")"
+    "llm2 = LLM.load(\"out/finetune/lora/final/\")"
    ]
   },
   {
@@ -418,7 +431,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/06_finetuning/06_part-3.ipynb b/06_finetuning/06_part-3.ipynb
@@ -137,6 +137,64 @@
     "\n",
     "# Exercise 3: Evaluate the finetuned LLM"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "773a25be-5a02-477b-bfea-ffd53e44647b",
+   "metadata": {},
+   "source": [
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "<br>\n",
+    "\n",
+    "# Solution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "6cd718c4-0e83-4a83-84f8-59e3fc4c3404",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'batch_size': 4,\n",
+      " 'checkpoint_dir': PosixPath('out/finetune/lora/final'),\n",
+      " 'device': None,\n",
+      " 'dtype': None,\n",
+      " 'force_conversion': False,\n",
+      " 'limit': None,\n",
+      " 'num_fewshot': None,\n",
+      " 'out_dir': None,\n",
+      " 'save_filepath': None,\n",
+      " 'seed': 1234,\n",
+      " 'tasks': 'mmlu_philosophy'}\n",
+      "2024-07-04:00:57:13,332 INFO     [huggingface.py:170] Using device 'cuda'\n",
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "2024-07-04:00:57:18,981 INFO     [evaluator.py:152] Setting random seed to 1234 | Setting numpy seed to 1234 | Setting torch manual seed to 1234\n",
+      "2024-07-04:00:57:18,981 INFO     [evaluator.py:203] Using pre-initialized model\n",
+      "2024-07-04:00:57:24,808 INFO     [evaluator.py:261] Setting fewshot random generator seed to 1234\n",
+      "2024-07-04:00:57:24,809 INFO     [task.py:411] Building contexts for mmlu_philosophy on rank 0...\n",
+      "100%|████████████████████████████████████████| 311/311 [00:00<00:00, 807.98it/s]\n",
+      "2024-07-04:00:57:25,206 INFO     [evaluator.py:438] Running loglikelihood requests\n",
+      "Running loglikelihood requests:   0%|                  | 0/1244 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n",
+      "Running loglikelihood requests: 100%|██████| 1244/1244 [00:07<00:00, 158.49it/s]\n",
+      "2024-07-04:00:57:33,515 WARNING  [huggingface.py:1315] Failed to get model SHA for /teamspace/studios/this_studio/out/finetune/lora/final/evaluate at revision main. Error: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/teamspace/studios/this_studio/out/finetune/lora/final/evaluate'. Use `repo_type` argument if needed.\n",
+      "fatal: not a git repository (or any parent up to mount point /teamspace/studios)\n",
+      "Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\n",
+      "|  Tasks   |Version|Filter|n-shot|Metric|   |Value |   |Stderr|\n",
+      "|----------|------:|------|-----:|------|---|-----:|---|-----:|\n",
+      "|philosophy|      0|none  |     0|acc   |↑  |0.5691|±  |0.0281|\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!litgpt evaluate out/finetune/lora/final --tasks \"mmlu_philosophy\" --batch_size 4"
+   ]
   }
  ],
  "metadata": {
@@ -160,7 +218,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/06_finetuning/06_part-4.ipynb b/06_finetuning/06_part-4.ipynb

Original file line number	Diff line number	Diff line change
`@@ -584,7 +584,7 @@`
`584`	`584`	`"name": "python",`
`585`	`585`	`"nbconvert_exporter": "python",`
`586`	`586`	`"pygments_lexer": "ipython3",`
`587`		`- "version": "3.10.6"`
	`587`	`+ "version": "3.10.10"`
`588`	`588`	`}`
`589`	`589`	`},`
`590`	`590`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -407,7 +407,7 @@`
`407`	`407`	`"name": "python",`
`408`	`408`	`"nbconvert_exporter": "python",`
`409`	`409`	`"pygments_lexer": "ipython3",`
`410`		`- "version": "3.10.6"`
	`410`	`+ "version": "3.10.10"`
`411`	`411`	`}`
`412`	`412`	`},`
`413`	`413`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -389,7 +389,7 @@`
`389`	`389`	`"name": "python",`
`390`	`390`	`"nbconvert_exporter": "python",`
`391`	`391`	`"pygments_lexer": "ipython3",`
`392`		`- "version": "3.10.6"`
	`392`	`+ "version": "3.10.10"`
`393`	`393`	`}`
`394`	`394`	`},`
`395`	`395`	`"nbformat": 4,`