From 6c0dc2362b314e1d87d2c006b63969a7cee8ae01 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Fri, 21 Jun 2024 05:23:24 -0500 Subject: [PATCH] Add standalone finetuning and evaluation scripts for chapter 7 (#234) * add finetuning and eval scripts * update link * update links * fix link --- .github/workflows/check-links.yml | 8 +- .gitignore | 3 + README.md | 2 +- ch06/01_main-chapter-code/README.md | 2 +- ...lass-finetune.py => gpt_class_finetune.py} | 0 ch06/01_main-chapter-code/tests.py | 2 +- ch07/01_main-chapter-code/README.md | 19 ++- ch07/01_main-chapter-code/ch07.ipynb | 4 +- ...uning.py => gpt_instruction_finetuning.py} | 3 +- ch07/01_main-chapter-code/ollama_evaluate.py | 120 ++++++++++++++++++ 10 files changed, 153 insertions(+), 10 deletions(-) rename ch06/01_main-chapter-code/{gpt-class-finetune.py => gpt_class_finetune.py} (100%) rename ch07/01_main-chapter-code/{gpt-instruction-finetuning.py => gpt_instruction_finetuning.py} (99%) create mode 100644 ch07/01_main-chapter-code/ollama_evaluate.py diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index 46fa32df..de33e520 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -23,8 +23,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest-check-links pytest-retry + pip install pytest pytest-check-links + # Current version of retry doesn't work well if there are broken non-URL links + # pip install pytest pytest-check-links pytest-retry - name: Check links run: | - pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5 + pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" + # pytest --check-links ./ --check-links-ignore "https://platform.openai.com/*" --check-links-ignore "https://arena.lmsys.org" --retries 2 --retry-delay 5 + diff --git a/.gitignore b/.gitignore index 45519021..6c118618 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ # Configs and keys +ch07/01_main-chapter-code/instruction-data-with-response-standalone.json +ch07/01_main-chapter-code/gpt2-medium355M-sft-standalone.pth ch07/02_dataset-utilities/config.json ch07/03_model-evaluation/config.json @@ -17,6 +19,7 @@ ch06/01_main-chapter-code/loss-plot.pdf ch06/01_main-chapter-code/accuracy-plot.pdf ch07/01_main-chapter-code/loss-plot.pdf +ch07/01_main-chapter-code/loss-plot-standalone.pdf # Checkpoint files appendix-A/01_main-chapter-code/model.pth diff --git a/README.md b/README.md index 5fd48378..f2eceb03 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Alternatively, you can view this and other files on GitHub at [https://github.co | Ch 3: Coding Attention Mechanisms | - [ch03.ipynb](ch03/01_main-chapter-code/ch03.ipynb)
- [multihead-attention.ipynb](ch03/01_main-chapter-code/multihead-attention.ipynb) (summary)
- [exercise-solutions.ipynb](ch03/01_main-chapter-code/exercise-solutions.ipynb)| [./ch03](./ch03) | | Ch 4: Implementing a GPT Model from Scratch | - [ch04.ipynb](ch04/01_main-chapter-code/ch04.ipynb)
- [gpt.py](ch04/01_main-chapter-code/gpt.py) (summary)
- [exercise-solutions.ipynb](ch04/01_main-chapter-code/exercise-solutions.ipynb) | [./ch04](./ch04) | | Ch 5: Pretraining on Unlabeled Data | - [ch05.ipynb](ch05/01_main-chapter-code/ch05.ipynb)
- [gpt_train.py](ch05/01_main-chapter-code/gpt_train.py) (summary)
- [gpt_generate.py](ch05/01_main-chapter-code/gpt_generate.py) (summary)
- [exercise-solutions.ipynb](ch05/01_main-chapter-code/exercise-solutions.ipynb) | [./ch05](./ch05) | -| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb)
- [gpt-class-finetune.py](ch06/01_main-chapter-code/gpt-class-finetune.py)
- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) | +| Ch 6: Finetuning for Text Classification | - [ch06.ipynb](ch06/01_main-chapter-code/ch06.ipynb)
- [gpt_class_finetune.py](ch06/01_main-chapter-code/gpt_class_finetune.py)
- [exercise-solutions.ipynb](ch06/01_main-chapter-code/exercise-solutions.ipynb) | [./ch06](./ch06) | | Ch 7: Finetuning to Follow Instructions | - [ch07.ipynb](ch07/01_main-chapter-code/ch07.ipynb) | [./ch07](./ch07) | | Appendix A: Introduction to PyTorch | - [code-part1.ipynb](appendix-A/01_main-chapter-code/code-part1.ipynb)
- [code-part2.ipynb](appendix-A/01_main-chapter-code/code-part2.ipynb)
- [DDP-script.py](appendix-A/01_main-chapter-code/DDP-script.py)
- [exercise-solutions.ipynb](appendix-A/01_main-chapter-code/exercise-solutions.ipynb) | [./appendix-A](./appendix-A) | | Appendix B: References and Further Reading | No code | - | diff --git a/ch06/01_main-chapter-code/README.md b/ch06/01_main-chapter-code/README.md index a5c14afb..6c21d9b4 100644 --- a/ch06/01_main-chapter-code/README.md +++ b/ch06/01_main-chapter-code/README.md @@ -9,5 +9,5 @@ ### Optional Code -- [gpt-class-finetune.py](gpt-class-finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary) +- [gpt_class_finetune.py](gpt_class_finetune.py) is a standalone Python script file with the code that we implemented in [ch06.ipynb](ch06.ipynb) to finetune the GPT model (you can think of it as a chapter summary) diff --git a/ch06/01_main-chapter-code/gpt-class-finetune.py b/ch06/01_main-chapter-code/gpt_class_finetune.py similarity index 100% rename from ch06/01_main-chapter-code/gpt-class-finetune.py rename to ch06/01_main-chapter-code/gpt_class_finetune.py diff --git a/ch06/01_main-chapter-code/tests.py b/ch06/01_main-chapter-code/tests.py index ef747018..40ee892e 100644 --- a/ch06/01_main-chapter-code/tests.py +++ b/ch06/01_main-chapter-code/tests.py @@ -10,7 +10,7 @@ def test_gpt_class_finetune(): - command = ["python", "ch06/01_main-chapter-code/gpt-class-finetune.py", "--test_mode"] + command = ["python", "ch06/01_main-chapter-code/gpt_class_finetune.py", "--test_mode"] result = subprocess.run(command, capture_output=True, text=True) assert result.returncode == 0, f"Script exited with errors: {result.stderr}" diff --git a/ch07/01_main-chapter-code/README.md b/ch07/01_main-chapter-code/README.md index f3f71d38..2ab6745f 100644 --- a/ch07/01_main-chapter-code/README.md +++ b/ch07/01_main-chapter-code/README.md @@ -10,12 +10,12 @@ - [load-finetuned-model.ipynb](load-finetuned-model.ipynb) is a standalone Jupyter notebook to load the instruction finetuned model we created in this chapter -- [gpt-instruction-finetuning.py](gpt-instruction-finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter +- [gpt_instruction_finetuning.py](gpt_instruction_finetuning.py) is a standalone Python script to instruction finetune the model as described in the main chapter (think of it as a chapter summary focused on the finetuning parts) Usage: ```bash -python gpt-instruction-finetuning.py +python gpt_instruction_finetuning.py ``` ``` @@ -55,3 +55,18 @@ Responses saved as instruction-data-with-response-standalone.json Model saved as gpt2-medium355M-sft-standalone.pth ``` +- [ollama_evaluate.py](ollama_evaluate.py) is a standalone Python script to evaluate the responses of the finetuned model as described in the main chapter (think of it as a chapter summary focused on the evaluation parts) + +Usage: + +```bash +python ollama_evaluate.py --file_path instruction-data-with-response-standalone.json +``` + +``` +Ollama running: True +Scoring entries: 100%|███████████████████████████████████████| 110/110 [01:08<00:00, 1.62it/s] +Number of scores: 110 of 110 +Average score: 51.75 +``` + diff --git a/ch07/01_main-chapter-code/ch07.ipynb b/ch07/01_main-chapter-code/ch07.ipynb index 8f856564..59b3c968 100644 --- a/ch07/01_main-chapter-code/ch07.ipynb +++ b/ch07/01_main-chapter-code/ch07.ipynb @@ -2616,7 +2616,7 @@ } ], "source": [ - "def generate_model_scores(json_data, json_key):\n", + "def generate_model_scores(json_data, json_key, model=\"llama3\"):\n", " scores = []\n", " for entry in tqdm(json_data, desc=\"Scoring entries\"):\n", " prompt = (\n", @@ -2626,7 +2626,7 @@ " f\" on a scale from 0 to 100, where 100 is the best score. \"\n", " f\"Respond with the integer number only.\"\n", " )\n", - " score = query_model(prompt)\n", + " score = query_model(prompt, model)\n", " try:\n", " scores.append(int(score))\n", " except ValueError:\n", diff --git a/ch07/01_main-chapter-code/gpt-instruction-finetuning.py b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py similarity index 99% rename from ch07/01_main-chapter-code/gpt-instruction-finetuning.py rename to ch07/01_main-chapter-code/gpt_instruction_finetuning.py index 686ca1d9..d6644dce 100644 --- a/ch07/01_main-chapter-code/gpt-instruction-finetuning.py +++ b/ch07/01_main-chapter-code/gpt_instruction_finetuning.py @@ -259,6 +259,7 @@ def main(): optimizer = torch.optim.AdamW(model.parameters(), lr=0.00005, weight_decay=0.1) num_epochs = 2 + torch.manual_seed(123) train_losses, val_losses, tokens_seen = train_model_simple( model, train_loader, val_loader, optimizer, device, num_epochs=num_epochs, eval_freq=5, eval_iter=5, @@ -276,7 +277,7 @@ def main(): ####################################### # Saving results ####################################### - print("Evaluating models") + print("Generating responses") for i, entry in tqdm(enumerate(test_data), total=len(test_data)): input_text = format_input(entry) diff --git a/ch07/01_main-chapter-code/ollama_evaluate.py b/ch07/01_main-chapter-code/ollama_evaluate.py new file mode 100644 index 00000000..07474a37 --- /dev/null +++ b/ch07/01_main-chapter-code/ollama_evaluate.py @@ -0,0 +1,120 @@ +# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt). +# Source for "Build a Large Language Model From Scratch" +# - https://www.manning.com/books/build-a-large-language-model-from-scratch +# Code: https://github.com/rasbt/LLMs-from-scratch +# +# A minimal instruction finetuning file based on the code in chapter 7 + +import json +import psutil +from tqdm import tqdm +import urllib.request + + +def query_model(prompt, model="llama3", url="http://localhost:11434/api/chat"): + # Create the data payload as a dictionary + data = { + "model": model, + "seed": 123, # for deterministic responses + "temperature": 0, # for deterministic responses + "messages": [ + {"role": "user", "content": prompt} + ] + } + + # Convert the dictionary to a JSON formatted string and encode it to bytes + payload = json.dumps(data).encode("utf-8") + + # Create a request object, setting the method to POST and adding necessary headers + request = urllib.request.Request(url, data=payload, method="POST") + request.add_header("Content-Type", "application/json") + + # Send the request and capture the response + response_data = "" + with urllib.request.urlopen(request) as response: + # Read and decode the response + while True: + line = response.readline().decode("utf-8") + if not line: + break + response_json = json.loads(line) + response_data += response_json["message"]["content"] + + return response_data + + +def check_if_running(process_name): + running = False + for proc in psutil.process_iter(["name"]): + if process_name in proc.info["name"]: + running = True + break + return running + + +def format_input(entry): + instruction_text = ( + f"Below is an instruction that describes a task. " + f"Write a response that appropriately completes the request." + f"\n\n### Instruction:\n{entry['instruction']}" + ) + + input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else "" + + return instruction_text + input_text + + +def main(file_path): + ollama_running = check_if_running("ollama") + + if not ollama_running: + raise RuntimeError("Ollama not running. Launch ollama before proceeding.") + print("Ollama running:", check_if_running("ollama")) + + with open(file_path, "r") as file: + test_data = json.load(file) + + model = "llama3" + scores = generate_model_scores(test_data, "model_response", model) + print(f"Number of scores: {len(scores)} of {len(test_data)}") + print(f"Average score: {sum(scores)/len(scores):.2f}\n") + + +def generate_model_scores(json_data, json_key, model="llama3"): + scores = [] + for entry in tqdm(json_data, desc="Scoring entries"): + prompt = ( + f"Given the input `{format_input(entry)}` " + f"and correct output `{entry['output']}`, " + f"score the model response `{entry[json_key]}`" + f" on a scale from 0 to 100, where 100 is the best score. " + f"Respond with the integer number only." + ) + score = query_model(prompt, model) + try: + scores.append(int(score)) + except ValueError: + print(f"Could not convert score: {score}") + continue + + return scores + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser( + description="Instruction finetune a GPT model" + ) + parser.add_argument( + "--file_path", + required=True, + help=( + "The path to the test dataset `.json` file with the" + " `'output'` and `'model_response'` keys" + ) + ) + args = parser.parse_args() + + main(file_path=args.file_path)