diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml
index 46fa32df..de33e520 100644
--- a/.github/workflows/check-links.yml
+++ b/.github/workflows/check-links.yml
@@ -23,8 +23,12 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install pytest pytest-check-links pytest-retry
+ pip install pytest pytest-check-links
+ # Current version of retry doesn't work well if there are broken non-URL links
+ # pip install pytest pytest-check-links pytest-retry
- name: Check links
run: |
- pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5
+ pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org"
+ # pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5
+
diff --git a/.gitignore b/.gitignore
index 45519021..6c118618 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
# Configs and keys
+ch07/01_main-chapter-code/instruction-data-with-response-standalone.json
+ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth
ch07/02_dataset-utilities/config.json
ch07/03_model-evaluation/config.json
@@ -17,6 +19,7 @@ ch06/01_main-chapter-code/loss-plot.pdf
ch06/01_main-chapter-code/accuracy-plot.pdf
ch07/01_main-chapter-code/loss-plot.pdf
+ch07/01_main-chapter-code/loss-plot-standalone.pdf
# Checkpoint files
appendix-A/01_main-chapter-code/model.pth
diff --git a/README.md b/README.md
index 5fd48378..f2eceb03 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ Alternatively, you can view this and other files on GitHub at [https://github.co
| Ch 3: Coding Attention Mechanisms | - [ch03.ipynb](ch03/01_main-chapter-code/ch03.ipynb)
- [multihead-attention.ipynb](ch03/01_main-chapter-code/multihead-attention.ipynb) (summary)
- [exercise-solutions.ipynb](ch03/01_main-chapter-code/exercise-solutions.ipynb)| [./ch03](./ch03) |
| Ch 4: Implementing a GPT Model from Scratch | - [ch04.ipynb](ch04/01_main-chapter-code/ch04.ipynb)
- [gpt.py](ch04/01_main-chapter-code/gpt.py) (summary)
- [exercise-solutions.ipynb](ch04/01_main-chapter-code/exercise-solutions.ipynb) | [./ch04](./ch04) |
| Ch 5: Pretraining on Unlabeled Data | - [ch05.ipynb](ch05/01_main-chapter-code/ch05.ipynb)
- [gpt_train.py](ch05/01_main-chapter-code/gpt_train.py) (summary)
- [gpt_generate.py](ch05/01_main-chapter-code/gpt_generate.py) (summary)
- [exercise-solutions.ipynb](ch05/01_main-chapter-code/exercise-solutions.ipynb) | [./ch05](./ch05) |
-| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb)
- [gpt-class-finetune.py](ch06/01_main-chapter-code/gpt-class-finetune.py)
- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) |
+| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb)
- [gpt_class_finetune.py](ch06/01_main-chapter-code/gpt_class_finetune.py)
- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) |
| Ch 7: Finetuning to Follow Instructions | - [ch07.ipynb](ch07/01_main-chapter-code/ch07.ipynb) | [./ch07](./ch07) |
| Appendix A: Introduction to PyTorch | - [code-part1.ipynb](appendix-A/01_main-chapter-code/code-part1.ipynb)
- [code-part2.ipynb](appendix-A/01_main-chapter-code/code-part2.ipynb)
- [DDP-script.py](appendix-A/01_main-chapter-code/DDP-script.py)
- [exercise-solutions.ipynb](appendix-A/01_main-chapter-code/exercise-solutions.ipynb) | [./appendix-A](./appendix-A) |
| Appendix B: References and Further Reading | No code | - |
diff --git a/ch06/01_main-chapter-code/README.md b/ch06/01_main-chapter-code/README.md
index a5c14afb..6c21d9b4 100644
--- a/ch06/01_main-chapter-code/README.md
+++ b/ch06/01_main-chapter-code/README.md
@@ -9,5 +9,5 @@
### Optional Code
-- [gpt-class-finetune.py](gpt-class-finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary)
+- [gpt_class_finetune.py](gpt_class_finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary)
diff --git a/ch06/01_main-chapter-code/gpt-class-finetune.py b/ch06/01_main-chapter-code/gpt_class_finetune.py
similarity index 100%
rename from ch06/01_main-chapter-code/gpt-class-finetune.py
rename to ch06/01_main-chapter-code/gpt_class_finetune.py
diff --git a/ch06/01_main-chapter-code/tests.py b/ch06/01_main-chapter-code/tests.py
index ef747018..40ee892e 100644
--- a/ch06/01_main-chapter-code/tests.py
+++ b/ch06/01_main-chapter-code/tests.py
@@ -10,7 +10,7 @@
def test_gpt_class_finetune():
- command = ["python", "ch06/01_main-chapter-code/gpt-class-finetune.py", "--test_mode"]
+ command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"]
result = subprocess.run(command, capture_output=True, text=True)
assert result.returncode == 0, f"Script exited with errors: {result.stderr}"
diff --git a/ch07/01_main-chapter-code/README.md b/ch07/01_main-chapter-code/README.md
index f3f71d38..2ab6745f 100644
--- a/ch07/01_main-chapter-code/README.md
+++ b/ch07/01_main-chapter-code/README.md
@@ -10,12 +10,12 @@
- [load-finetuned-model.ipynb](load-finetuned-model.ipynb) is a standalone Jupyter notebook to load the instruction finetuned model we created in this chapter
-- [gpt-instruction-finetuning.py](gpt-instruction-finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter
+- [gpt_instruction_finetuning.py](gpt_instruction_finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter (think of it as a chapter summary focused on the finetuning parts)
Usage:
```bash
-python gpt-instruction-finetuning.py
+python gpt_instruction_finetuning.py
```
```
@@ -55,3 +55,18 @@ Responses saved as instruction-data-with-response-standalone.json
Model saved as gpt2-medium355M-sft-standalone.pth
```
+- [ollama_evaluate.py](ollama_evaluate.py) is a standalone Python script to evaluate the responses of the finetuned model as described in the main chapter (think of it as a chapter summary focused on the evaluation parts)
+
+Usage:
+
+```bash
+python ollama_evaluate.py --file_path instruction-data-with-response-standalone.json
+```
+
+```
+Ollama running: True
+Scoring entries: 100%|███████████████████████████████████████| 110/110 [01:08<00:00, 1.62it/s]
+Number of scores: 110 of 110
+Average score: 51.75
+```
+
diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb
index 8f856564..59b3c968 100644
--- a/ch07/01_main-chapter-code/ch07.ipynb
+++ b/ch07/01_main-chapter-code/ch07.ipynb
@@ -2616,7 +2616,7 @@
}
],
"source": [
- "def generate_model_scores(json_data, json_key):\n",
+ "def generate_model_scores(json_data, json_key, model=\"llama3\"):\n",
" scores = []\n",
" for entry in tqdm(json_data, desc=\"Scoring entries\"):\n",
" prompt = (\n",
@@ -2626,7 +2626,7 @@
" f\" on a scale from 0 to 100, where 100 is the best score. \"\n",
" f\"Respond with the integer number only.\"\n",
" )\n",
- " score = query_model(prompt)\n",
+ " score = query_model(prompt, model)\n",
" try:\n",
" scores.append(int(score))\n",
" except ValueError:\n",
diff --git a/ch07/01_main-chapter-code/gpt-instruction-finetuning.py b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
similarity index 99%
rename from ch07/01_main-chapter-code/gpt-instruction-finetuning.py
rename to ch07/01_main-chapter-code/gpt_instruction_finetuning.py
index 686ca1d9..d6644dce 100644
--- a/ch07/01_main-chapter-code/gpt-instruction-finetuning.py
+++ b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py
@@ -259,6 +259,7 @@ def main():
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1)
num_epochs = 2
+ torch.manual_seed(123)
train_losses, val_losses, tokens_seen = train_model_simple(
model, train_loader, val_loader, optimizer, device,
num_epochs=num_epochs, eval_freq=5, eval_iter=5,
@@ -276,7 +277,7 @@ def main():
#######################################
# Saving results
#######################################
- print("Evaluating models")
+ print("Generating responses")
for i, entry in tqdm(enumerate(test_data), total=len(test_data)):
input_text = format_input(entry)
diff --git a/ch07/01_main-chapter-code/ollama_evaluate.py b/ch07/01_main-chapter-code/ollama_evaluate.py
new file mode 100644
index 00000000..07474a37
--- /dev/null
+++ b/ch07/01_main-chapter-code/ollama_evaluate.py
@@ -0,0 +1,120 @@
+# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
+# Source for "Build a Large Language Model From Scratch"
+# - https://www.manning.com/books/build-a-large-language-model-from-scratch
+# Code: https://github.com/rasbt/LLMs-from-scratch
+#
+# A minimal instruction finetuning file based on the code in chapter 7
+
+import json
+import psutil
+from tqdm import tqdm
+import urllib.request
+
+
+def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"):
+ # Create the data payload as a dictionary
+ data = {
+ "model": model,
+ "seed": 123, # for deterministic responses
+ "temperature": 0, # for deterministic responses
+ "messages": [
+ {"role": "user", "content": prompt}
+ ]
+ }
+
+ # Convert the dictionary to a JSON formatted string and encode it to bytes
+ payload = json.dumps(data).encode("utf-8")
+
+ # Create a request object, setting the method to POST and adding necessary headers
+ request = urllib.request.Request(url, data=payload, method="POST")
+ request.add_header("Content-Type", "application/json")
+
+ # Send the request and capture the response
+ response_data = ""
+ with urllib.request.urlopen(request) as response:
+ # Read and decode the response
+ while True:
+ line = response.readline().decode("utf-8")
+ if not line:
+ break
+ response_json = json.loads(line)
+ response_data += response_json["message"]["content"]
+
+ return response_data
+
+
+def check_if_running(process_name):
+ running = False
+ for proc in psutil.process_iter(["name"]):
+ if process_name in proc.info["name"]:
+ running = True
+ break
+ return running
+
+
+def format_input(entry):
+ instruction_text = (
+ f"Below is an instruction that describes a task. "
+ f"Write a response that appropriately completes the request."
+ f"\n\n### Instruction:\n{entry['instruction']}"
+ )
+
+ input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
+
+ return instruction_text + input_text
+
+
+def main(file_path):
+ ollama_running = check_if_running("ollama")
+
+ if not ollama_running:
+ raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
+ print("Ollama running:", check_if_running("ollama"))
+
+ with open(file_path, "r") as file:
+ test_data = json.load(file)
+
+ model = "llama3"
+ scores = generate_model_scores(test_data, "model_response", model)
+ print(f"Number of scores: {len(scores)} of {len(test_data)}")
+ print(f"Average score: {sum(scores)/len(scores):.2f}\n")
+
+
+def generate_model_scores(json_data, json_key, model="llama3"):
+ scores = []
+ for entry in tqdm(json_data, desc="Scoring entries"):
+ prompt = (
+ f"Given the input `{format_input(entry)}` "
+ f"and correct output `{entry['output']}`, "
+ f"score the model response `{entry[json_key]}`"
+ f" on a scale from 0 to 100, where 100 is the best score. "
+ f"Respond with the integer number only."
+ )
+ score = query_model(prompt, model)
+ try:
+ scores.append(int(score))
+ except ValueError:
+ print(f"Could not convert score: {score}")
+ continue
+
+ return scores
+
+
+if __name__ == "__main__":
+
+ import argparse
+
+ parser = argparse.ArgumentParser(
+ description="Instruction finetune a GPT model"
+ )
+ parser.add_argument(
+ "--file_path",
+ required=True,
+ help=(
+ "The path to the test dataset `.json` file with the"
+ " `'output'` and `'model_response'` keys"
+ )
+ )
+ args = parser.parse_args()
+
+ main(file_path=args.file_path)