diff --git a/.github/actions/ollama/action.yml b/.github/actions/ollama/action.yml
new file mode 100644
index 000000000..47e6d7094
--- /dev/null
+++ b/.github/actions/ollama/action.yml
@@ -0,0 +1,48 @@
+name: 'Ollama Setup'
+description: 'Composte action for Ollama set up in GH environment'
+runs:
+  using: 'composite'
+  steps:
+    - name: Remove unnecessary files
+      shell: bash
+      run: |
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      # Set up Ollama
+    - name: Install Ollama and start server
+      shell: bash
+      run: |
+        curl -fsSL https://ollama.com/install.sh | sudo -E sh
+
+    - name: Pull models in examples/
+      shell: bash
+      run: |
+        ollama pull granite3.2:2b
+        ollama pull granite3.2:8b
+        ollama pull mxbai-embed-large
+        ollama list
+
+    - name: Check that all required models are available
+      shell: bash
+      run: |
+        models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
+        missing=0
+        for model in "${models[@]}"; do
+          if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
+            echo "❌ Model $model is missing!"
+            missing=1
+          fi
+        done
+
+        if [ "$missing" -eq 1 ]; then
+          exit 1
+        else
+          echo "✅ All expected models are available."
+        fi
+
+    - name: Wait for Ollama server
+      shell: bash
+      run: |
+        sleep 5
+        time curl -i http://localhost:11434
\ No newline at end of file
diff --git a/.github/actions/run-examples/action.yml b/.github/actions/run-examples/action.yml
new file mode 100644
index 000000000..9dc8205bb
--- /dev/null
+++ b/.github/actions/run-examples/action.yml
@@ -0,0 +1,109 @@
+name: 'Ollama Setup'
+description: 'Composte action to set up Run Examples'
+inputs:
+  python-version:
+    description: 'Python version'
+    required: true
+    default: '3.11'
+  runner-os:
+    description: 'Runner OS'
+    required: true
+  repository:
+    description: 'Repository name this pull request is initiated from'
+    required: false
+  head-ref:
+    description: 'Head ref of the repo'
+    required: false
+    default: 'main'
+  token: 
+    description: 'Github token'
+    required: false 
+  update-results:
+    description: 'Whether to update the results for this run. Must be false for nightly runs'
+    required: true
+  check:
+    description: 'Files to patch tests/test_examples_run.yaml with. These are the PDL files that the test will run against. Defaults to all PDL files.'
+    required: false
+    default: '[]'
+runs:
+  using: 'composite'
+  steps:
+    # # Set up Ollama
+    - uses: ./.github/actions/ollama
+
+    # Configure Run Examples environment
+    - uses: actions/checkout@v4
+      with:
+        token: ${{ inputs.token }}
+        ref: ${{ inputs.head-ref }}
+        repository: ${{ inputs.repository }}
+        fetch-depth: 0
+    
+    - name: Patch tests/test_examples_run.yaml check with modified files
+      shell: bash 
+      run: |
+        yq -i '.check = (${{ inputs.check }})' tests/test_examples_run.yaml
+        
+    - name: Print test Run Examples config
+      shell: bash
+      run: cat tests/test_examples_run.yaml
+
+    # Run tests
+    - name: Set up Python ${{ inputs.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ inputs.python-version }}
+    - name: Cache pip
+      uses: actions/cache@v4
+      with:
+        # This path is specific to Ubuntu
+        path: ${{ env.pythonLocation }}
+        # Look to see if there is a cache hit for the setup file
+        key: ${{ inputs.runner-os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+        restore-keys: |
+          ${{ inputs.runner-os }}-pip-new3
+          ${{ inputs.runner-os }}-new3
+    - name: Install dependencies
+      shell: bash
+      run: pip install --upgrade --upgrade-strategy eager .[all]
+    - name: Pip list packages
+      shell: bash
+      run: pip list
+    - name: Run Pytest
+      shell: bash
+      run: |
+        cat tests/test_examples_run.yaml        
+        (
+          set +e
+          py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py --disable-pytest-warnings
+          EXIT_CODE=$?
+
+          if [ $EXIT_CODE -eq 0 ]; then
+            echo "TEST_RESULT=PASSED" >> $GITHUB_ENV
+          else
+            echo "TEST_RESULT=FAILED" >> $GITHUB_ENV
+          fi
+        )
+    
+    # Commit the results if update results
+    - name: Push new results to branch
+      shell: bash
+      if: ${{ inputs.update-results == 'true' }}
+      run: |
+        git config --local user.name github-actions[bot]
+        git config --local user.email 41898282+github-actions[bot]@users.noreply.github.com
+        git status
+        git pull origin ${{ inputs.head-ref }}
+        git add tests/results/
+        git diff --cached --quiet || git commit -s -m "github-actions[bot]: Run examples: updated result files on your behalf"
+        # git push origin ${{ inputs.head-ref }}
+        git push https://x-access-token:${{ inputs.token }}@github.com/${{ inputs.repository }} HEAD:${{ inputs.head-ref }}
+
+    - name: Check if pytest passed
+      shell: bash
+      run: |
+        if [ "$TEST_RESULT" == "PASSED" ]; then
+          exit 0
+        else
+            exit 1
+        fi
diff --git a/.github/workflows/run-examples-prep.yml b/.github/workflows/run-examples-prep.yml
new file mode 100644
index 000000000..9ea984412
--- /dev/null
+++ b/.github/workflows/run-examples-prep.yml
@@ -0,0 +1,47 @@
+---
+name: Run examples on modified PDL files
+on: [pull_request]
+jobs:
+  tests:
+    name: Execution tests
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.11', '3.12', '3.13']
+        
+    steps:
+    # Detect modified PDL files, includes Add, Modified, but not Deleted
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Detect all PDL files that were changed or added
+      id: changed-pdl-files
+      uses: tj-actions/changed-files@6cb76d07bee4c9772c6882c06c37837bf82a04d3 # v46
+      with:
+        files: |
+          **.pdl
+        json: "true"
+        escape_json: "false"
+    - name: List PDL files that were modified or added and append to test_examples_run.yaml
+      run: |
+        echo ${{ steps.changed-pdl-files.outputs.all_changed_files }}
+        if [ ${{ steps.changed-pdl-files.outputs.all_changed_files_count }} -gt 0 ]; then
+          echo "early-stop=false" >> $GITHUB_ENV
+        else
+          echo "No file need to be checked, skipping all subsequent tests."
+          echo "early-stop=true" >> $GITHUB_ENV
+        fi
+
+    - name: Run examples 
+      uses:  ./.github/actions/run-examples
+      # Only run if there are modified PDL files
+      if:  env.early-stop == 'false'
+      with: 
+        python-version: ${{ matrix.python-version }}
+        runner-os: ${{ runner.os }}
+        repository: ${{ github.event.pull_request.head.repo.full_name }}
+        head-ref: ${{ github.event.pull_request.head.ref }}
+        token: ${{ github.token }}
+        update-results: 'false'
+        check: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
\ No newline at end of file
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
index f3e75674b..abc7fd933 100644
--- a/.github/workflows/run-examples.yml
+++ b/.github/workflows/run-examples.yml
@@ -6,7 +6,6 @@ on:
     - cron:  '0 1 * * *'
   workflow_dispatch:
 
-
 jobs:
   tests:
     name: Execution tests
@@ -15,93 +14,17 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ['3.11', '3.12', '3.13']
-
     steps:
-
-      # Free up some disk space
-    - name: Remove unnecessary files
-      run: |
-        sudo rm -rf /usr/share/dotnet
-        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-
-      # Set up Ollama
-    - name: Install Ollama and start server
-      shell: bash
-      run: |
-        curl -fsSL https://ollama.com/install.sh | sudo -E sh
-
-    - name: Pull models in examples/
-      shell: bash
-      run: |
-        ollama pull granite3.2:2b
-        ollama pull granite3.2:8b
-        ollama pull mxbai-embed-large
-        ollama list
-
-    - name: Check that all required models are available
-      shell: bash
-      run: |
-        models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
-        missing=0
-        for model in "${models[@]}"; do
-          if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
-            echo "❌ Model $model (or substring) is missing!"
-            missing=1
-          fi
-        done
-
-        if [ "$missing" -eq 1 ]; then
-          exit 1
-        else
-          echo "✅ All expected models are available."
-        fi
-
-    - name: Wait for Ollama server
-      shell: bash
-      run: |
-        sleep 10
-        time curl -i http://localhost:11434
-
-    # Run tests
-    - uses: actions/checkout@v4
-      with:
-        ref: ${{ github.head_ref }}
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Cache pip
-      uses: actions/cache@v4
-      with:
-        # This path is specific to Ubuntu
-        path: ${{ env.pythonLocation }}
-        # Look to see if there is a cache hit for the setup file
-        key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-new3
-          ${{ runner.os }}-new3
-    - name: Install dependencies
-      run: pip install --upgrade --upgrade-strategy eager .[all]
-    - name: pip list packages
-      run: pip list
-    - name: show pip dependencies
-      run: |
-        pip install pipdeptree
-        pipdeptree -fl
-    - name: run tests
-      env:
-        WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
-        WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
-        WATSONX_URL: ${{ secrets.WATSONX_URL }}
-        REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
-        OLLAMA_GHACTIONS_RESULTS: true
-      run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
-    - name: Update example result files (if any) generated from Ollama running on GH Actions
-      if: matrix.python-version == '3.11'
-      run: |
-        git config --local user.name github-actions[bot]
-        git config --local user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com"
-        git status
-        git add tests/results/
-        git diff --cached --quiet || git commit -S -s -m "github-actions[bot]: Updated results file when running examples on $(date)"
-        git push
\ No newline at end of file
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+      - uses: ./.github/actions/run-examples
+        with:
+          python-version: ${{ matrix.python-version }}
+          runner-os: ${{ runner.os }}
+          repository: ${{ github.repository }}
+          head-ref: ${{ github.head_ref }}
+          token: ${{ github.token }}
+          update-results: 'false'
+          check: '[]'   # Empty list means run against all PDL programs
+          
diff --git a/docs/contrib.md b/docs/contrib.md
index e4995b0ea..f98f69d93 100644
--- a/docs/contrib.md
+++ b/docs/contrib.md
@@ -48,10 +48,10 @@ Follow the following instructions to set up a dev environment to get started wit
 4. Test that you can run an editable version of PDL
 
     ```
-    pdl examples/hello/hello.pdl
+    pdl examples/demo/1-hello.pdl
 
     Hello
-    Hello! How can I help you today?
+    Hello
     ```
 
 ### Documentation updates
@@ -61,8 +61,7 @@ When you make changes to PDL, ensure to document any new features in the docs se
 Install the required dependencies for documentation.
 
 ```
-pip install mkdocs-get-deps
-pip install $(mkdocs-get-deps)
+pip install -e .[docs]
 ```
 
 Then serve the docs to load a preview.
@@ -71,4 +70,70 @@ Then serve the docs to load a preview.
 mkdocs serve
 ```
 
-You are all set!
\ No newline at end of file
+You are all set!
+
+### Run examples
+
+PDL executes nightly runs for Run Examples, which searches for all the `.pdl` programs in the repo and runs the interpreter against each file. The [config file for Run Examples](../tests/test_examples_run.yaml) describes how to handle each file. There are four conditions:
+
+1. `skip`: a list of PDL files that are skipped in Run Examples
+2. `with_inputs`: PDL files that require user input. Each file name is mapped to two fields that describe how inputs are patched
+   1. `stdin`: separated by a newline, each line represents the user input (string)
+   2. `scope`: scope for the PDL program (a dictionary of key-value pairs)
+3. `expected_parse_error`: a list of PDL files that expect parse errors 
+4. `expected_runtime_error`: a list of PDL files that expect runtime errors
+   
+If you wish to make a contribution to PDL and modify or add any PDL program to the repo, it is important that you provide the new expected results for those files so that the Run Examples nightly test does not break. 
+
+#### Local dev
+
+Under `check`, you can provide a list of files that you want to run Pytest against. If you leave it empty (`check: ` or `check: []`), then by default, Pytest will be executed against all files in the repo, except for those under `skip`. For local development, it is useful to only test against a subset of files so that Pytest executes faster.
+
+If you expect the files to produce a different result, setting `update_results: true` will automatically create a new file under `tests/examples/results` capturing the new output for each of the file in `check`. It is useful to set this field to `true` before opening a PR. 
+
+Run this Pytest command for Run Examples, which is the same command for the nightly test.
+
+```
+pytest --capture=tee-sys -rfE -s tests/test_examples_run.py --disable-pytest-warnings
+```
+
+#### Opening a pull request
+
+A slight variation in the Python version and OS environment can cause a different LLM response, thus Run Examples might fail because it uses exact string matching for PDL outputs.
+
+When you open a pull request (PR) against the `main` branch, a series of status checks will be executed. Specificially, three Run Examples test will be initiated against the PDL files you have added and modified as part of the PR. If there's any variation, you should manually examine the results produced in the Github Actions environment, then copy and paste the results to a new file, and push another commit to your PR so the CI can pass. Be aware of whitespaces in between sentences. 
+
+Below is what the CI might look like if the results differ. If the results look reasonable, you should
+1. copy and paste the result for that file 
+2. create a new file in `tests/results` in the format of `tests/results/<path/to/file>.<i>.result` where `<i>` should be incremented from the highest number of `<i>.result` in the folder
+
+If the test fails, look for something similar like the following:
+```
+tests/test_examples_run.py 
+============================================================================
+File that produced wrong result: examples/demo/1-hello.pdl
+Actual result (copy everything below this line):
+✂️ ------------------------------------------------------------
+Hello
+Hello
+-------------------------------------------------------------
+
+============================================================================
+File that produced wrong result: examples/demo/2-model-chaining.pdl
+Actual result (copy everything below this line):
+✂️ ------------------------------------------------------------
+Hello
+Hello
+Did you just say Hello?
+Yes, I did. It's a common greeting, similar to how humans might respond when they first interact with an artificial intelligence like me. How can I assist you today?
+-------------------------------------------------------------
+F
+```
+
+Note: Your PR should always set `update_results: false` before merging, though the CI pipeline will not attempt to write the results to your PR even if you forget to set this field to `false`. 
+
+Here's a preview of the current configuration file for Run Examples:
+
+```yaml
+--8<-- "../tests/test_examples_run.yaml"
+```
diff --git a/examples/chatbot/chatbot.pdl b/examples/chatbot/chatbot.pdl
index a53f68efd..3c89de4b9 100644
--- a/examples/chatbot/chatbot.pdl
+++ b/examples/chatbot/chatbot.pdl
@@ -1,4 +1,4 @@
-description: Chatbot
+description: A Chatbot that responds to user's questions
 text:
 # Allow the user to type any question, implicitly adding the question to the context.
 - read:
diff --git a/examples/demo/9-react.pdl b/examples/demo/9-react.pdl
index 80eccde60..dd5742e12 100644
--- a/examples/demo/9-react.pdl
+++ b/examples/demo/9-react.pdl
@@ -1,3 +1,4 @@
+description: Demonstrating a React program
 defs:
   tools:
     data:
diff --git a/examples/react/demo.pdl b/examples/react/demo.pdl
index f50210e2e..fdfbfe2a2 100644
--- a/examples/react/demo.pdl
+++ b/examples/react/demo.pdl
@@ -1,3 +1,4 @@
+description: Demo for a react pattern program
 defs:
   tools:
     data:
diff --git a/tests/results/examples/chatbot/chatbot.ollama_ghactions.result b/tests/results/examples/chatbot/chatbot.1.result
similarity index 100%
rename from tests/results/examples/chatbot/chatbot.ollama_ghactions.result
rename to tests/results/examples/chatbot/chatbot.1.result
diff --git a/tests/results/examples/code/code-eval.ollama_ghactions.result b/tests/results/examples/code/code-eval.1.result
similarity index 100%
rename from tests/results/examples/code/code-eval.ollama_ghactions.result
rename to tests/results/examples/code/code-eval.1.result
diff --git a/tests/results/examples/code/code-eval.2.result b/tests/results/examples/code/code-eval.2.result
new file mode 100644
index 000000000..31fa397a7
--- /dev/null
+++ b/tests/results/examples/code/code-eval.2.result
@@ -0,0 +1,27 @@
+@SuppressWarnings("unchecked")
+public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {
+  Map<String, String> offsetMap;
+  if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
+    offsetMap = new HashMap<>();
+  } else {
+    offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
+  }
+  return offsetMap;
+}
+The provided Java function `deserializeOffsetMap` is part of the StreamSets Data Collector (datacollector) project, specifically located in the `stagesupport/src/main/java/com/` directory. This function is named `OffsetUtil.deserializeOffsetMap`. Here's a breakdown of its purpose and functionality:
+1. **Purpose**: The primary goal of this method is to deserialize a JSON string into a `Map<String, String>` object representing an offset map. Offsets are typically used in data processing pipelines to track the position within input data streams.
+2. **Parameters**:
+   - `lastSourceOffset`: A required parameter that represents the JSON-encoded offset map as a string. This is the source of the deserialization process.
+3. **Return Type**: The method returns a `Map<String, String>`. This means it will return a map where keys are strings and values are also strings.
+4. **Code Explanation**:
+   - `@SuppressWarnings("unchecked")`: This annotation tells the compiler to ignore potential warnings related to unchecked casts. In this context, it's safe because we're working with `Map` objects, which are known to be compatible with `java.lang.Object`.
+
+   - `public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException`: This is the method signature. It declares a public static method named `deserializeOffsetMap`, which takes one parameter (a string representing JSON-encoded offset map data), and returns a `Map<String, String>`. The method also specifies that it may throw an `IOException`.
+   - `Map<String, String> offsetMap;`: This line initializes a variable called `offsetMap` to hold the deserialized map.
+   - `if (lastSourceOffset == null || lastSourceOffset.isEmpty()) { ... } else { ... }`: The method uses a conditional statement to check if the input string is either `null` or empty. If so, it creates and initializes an empty `HashMap<String, String>` as `offsetMap`.
+   - `offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);`: When the condition in the previous line evaluates to false (i.e., when `lastSourceOffset` is not null or empty), this line performs the actual deserialization using Jackson's `JSON_MAPPER`. It reads the input string as a JSON object and converts it into a `Map<String, String>`.
+   - `return offsetMap;`: Finally, after processing the input string (either creating an empty map if necessary or performing the JSON-to-map conversion), this line returns the resulting `offsetMap` to the caller.
+In summary, `deserializeOffsetMap` is a utility function that safely converts a JSON-encoded offset map string into a `Map<String, String>`. It handles both null and empty inputs gracefully by initializing an empty map when necessary before performing the actual deserialization using Jackson's JSON mapper.
+EVALUATION:
+The similarity (Levenshtein) between this answer and the ground truth is:
+0.1734802701741912
\ No newline at end of file
diff --git a/tests/results/examples/code/code-json.ollama_ghactions.result b/tests/results/examples/code/code-json.1.result
similarity index 100%
rename from tests/results/examples/code/code-json.ollama_ghactions.result
rename to tests/results/examples/code/code-json.1.result
diff --git a/tests/results/examples/code/code.ollama_ghactions.result b/tests/results/examples/code/code.1.result
similarity index 100%
rename from tests/results/examples/code/code.ollama_ghactions.result
rename to tests/results/examples/code/code.1.result
diff --git a/tests/results/examples/code/code.2.result b/tests/results/examples/code/code.2.result
new file mode 100644
index 000000000..2bd33a88c
--- /dev/null
+++ b/tests/results/examples/code/code.2.result
@@ -0,0 +1,21 @@
+@SuppressWarnings("unchecked")
+public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {
+  Map<String, String> offsetMap;
+  if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
+    offsetMap = new HashMap<>();
+  } else {
+    offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
+  }
+  return offsetMap;
+}
+The provided Java function `deserializeOffsetMap` is part of the StreamSets Data Collector (datacollector) repository, specifically located in the `stagesupport/src/main/java/com/` directory. This function is named `OffsetUtil.deserializeOffsetMap`. Here's a breakdown of its purpose and functionality:
+1. **Purpose**: The primary goal of this method is to deserialize a JSON string into a `Map<String, String>` object. It assumes that the input JSON represents an offset map with keys as strings (e.g., "record_id", "timestamp") and values also being strings (e.g., "1234567890" or "2022-01-01T00:00:00Z").
+2. **Input**: The method takes a single parameter, `lastSourceOffset`, which is expected to be a JSON string representing an offset map. If this input is null or empty (i.e., `null` or `""`), the function initializes and returns a new `HashMap<String, String>` with no entries.
+3. **Deserialization**:
+   - When `lastSourceOffset` is not null or empty:
+     - The method uses Jackson's `JSON_MAPPER`, an instance of `ObjectMapper`, to parse the JSON string into a `Map<String, String>`. This is done using the `readValue()` method with `Map.class` as the target class.
+   - If `lastSourceOffset` is null or empty:
+     - The function initializes and returns a new `HashMap<String, String>` with no entries (i.e., an empty map).
+4. **Return Value**: Regardless of whether the input was null or non-empty, this method always returns a `Map<String, String>`. This ensures that the caller can safely use the returned object without worrying about potential null values.
+5. **Exception Handling**: The function does not explicitly handle `IOException`. However, since it's called within the context of StreamSets Data Collector (datacollector), any underlying I/O issues are likely to be managed by the framework itself.
+In summary, this method serves as a utility for converting JSON strings into Map<String, String> objects representing offset data. It ensures that null or empty inputs result in an empty map, while non-empty inputs are parsed using Jackson's `JSON_MAPPER`.
\ No newline at end of file
diff --git a/tests/results/examples/demo/10-sdg.ollama_ghactions.result b/tests/results/examples/demo/10-sdg.1.result
similarity index 100%
rename from tests/results/examples/demo/10-sdg.ollama_ghactions.result
rename to tests/results/examples/demo/10-sdg.1.result
diff --git a/tests/results/examples/demo/3-def-use.ollama_ghactions.result b/tests/results/examples/demo/3-def-use.1.result
similarity index 100%
rename from tests/results/examples/demo/3-def-use.ollama_ghactions.result
rename to tests/results/examples/demo/3-def-use.1.result
diff --git a/tests/results/examples/demo/4-function.ollama_ghactions.result b/tests/results/examples/demo/4-function.1.result
similarity index 100%
rename from tests/results/examples/demo/4-function.ollama_ghactions.result
rename to tests/results/examples/demo/4-function.1.result
diff --git a/tests/results/examples/demo/5-code-eval.ollama_ghactions.result b/tests/results/examples/demo/5-code-eval.1.result
similarity index 100%
rename from tests/results/examples/demo/5-code-eval.ollama_ghactions.result
rename to tests/results/examples/demo/5-code-eval.1.result
diff --git a/tests/results/examples/demo/6-code-json.1.result b/tests/results/examples/demo/6-code-json.1.result
index a73c91938..0ab386980 100644
--- a/tests/results/examples/demo/6-code-json.1.result
+++ b/tests/results/examples/demo/6-code-json.1.result
@@ -1 +1 @@
-{"input": {"source_code": "@SuppressWarnings(\"unchecked\")\npublic static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {\n  Map<String, String> offsetMap;\n  if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {    \n    offsetMap = new HashMap<>();  \n  } else {\n    offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);  \n  }\n  return offsetMap;\n}\n", "repo_info": {"repo": "streamsets/datacollector", "path": "stagesupport/src/main/java/com/.../OffsetUtil.java", "function_name": "OffsetUtil.deserializeOffsetMap"}}, "output": "The provided Java code is a static method named `deserializeOffsetMap` within the `OffsetUtil` class in the StreamSets DataCollector repository's `stagesupport/src/main/java/com/` directory, specifically at `com.streamsets.datacollector.onprem.plugin.kafka.offset.OffsetUtil.java`. This method is designed to deserialize a JSON string containing offset information into a `Map<String, String>` object.\n\nHere's an explanation of the code:\n\n1. **Method Signature:**\n   ```java\n   @SuppressWarnings(\"unchecked\")\n   public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException;\n   ```\n   - The method is declared as `public`, `static`, and returns a `Map<String, String>`.\n   - It takes one parameter: `lastSourceOffset` of type `String`.\n   - The method throws an `IOException`.\n\n2. **Null and Empty Check:**\n   ```java\n   if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {\n     offsetMap = new HashMap<>();  \n   } else {\n     // ...\n   }\n   ```\n   - If the input string is either `null` or empty, a new `HashMap<String, String>` named `offsetMap` is created and assigned to it.\n\n3. **Deserialization:**\n   ```java\n   offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);\n   ```\n   - If the input string isn't null or empty, the method uses a `JSON_MAPPER` object (assumed to be an instance of a JSON parser like Jackson's `ObjectMapper`) to deserialize it into a `Map<String, String>`. The `readValue()` function is called with two arguments:\n     1. The input string (`lastSourceOffset`).\n     2. A type argument `Map.class`, indicating that the method expects a map as output.\n\n4. **Return Statement:**\n   ```java\n   return offsetMap;\n   ```\n   - After processing, the deserialized or default-constructed `offsetMap` is returned.\n\nIn summary, this function takes a JSON string representing an offset map and returns it as a `Map<String, String>`. If no input string is provided (null or empty), it creates a new empty map; otherwise, it deserializes the given JSON string into a map using a JSON parser. The `@SuppressWarnings(\"unchecked\")` annotation warns about potential unchecked casts in the code but doesn't affect its functionality since `Map<String, String>` is a known type.", "metric": 0.19340463458110513}
\ No newline at end of file
+{"input": {"source_code": "@SuppressWarnings(\"unchecked\")\npublic static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {\n  Map<String, String> offsetMap;\n  if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {    \n    offsetMap = new HashMap<>();  \n  } else {\n    offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);  \n  }\n  return offsetMap;\n}\n", "repo_info": {"repo": "streamsets/datacollector", "path": "stagesupport/src/main/java/com/.../OffsetUtil.java", "function_name": "OffsetUtil.deserializeOffsetMap"}}, "output": "The provided Java function `deserializeOffsetMap` is part of the StreamSets Data Collector (datacollector) repository, specifically located in the `stagesupport/src/main/java/com/` directory. This function is named `OffsetUtil.deserializeOffsetMap`. Here's a breakdown of its purpose and functionality:\n\n1. **Purpose**: The primary goal of this method is to deserialize a JSON string into a `Map<String, String>` object. It assumes that the input JSON represents an offset map with keys as strings (e.g., \"record_id\", \"timestamp\") and values also being strings (e.g., \"1234567890\" or \"2022-01-01T00:00:00Z\").\n\n2. **Input**: The method takes a single parameter, `lastSourceOffset`, which is expected to be a JSON string representing an offset map. If this input is null or empty (i.e., `null` or `\"\"`), the function initializes and returns a new `HashMap<String, String>` with no entries.\n\n3. **Deserialization**:\n   - When `lastSourceOffset` is not null or empty:\n     - The method uses Jackson's `JSON_MAPPER`, an instance of `ObjectMapper`, to parse the JSON string into a `Map<String, String>`. This is done using the `readValue()` method with `Map.class` as the target class.\n   - If `lastSourceOffset` is null or empty:\n     - The function initializes and returns a new `HashMap<String, String>` with no entries (i.e., an empty map).\n\n4. **Return Value**: Regardless of whether the input was null or non-empty, this method always returns a `Map<String, String>`. This ensures that the caller can safely use the returned object without worrying about potential null values.\n\n5. **Exception Handling**: The function does not explicitly handle `IOException`. However, since it's called within the context of StreamSets Data Collector (datacollector), any underlying I/O issues are likely to be managed by the framework itself.\n\nIn summary, this method serves as a utility for converting JSON strings into Map<String, String> objects representing offset data. It ensures that null or empty inputs result in an empty map, while non-empty inputs are parsed using Jackson's `JSON_MAPPER`.", "metric": 0.20641762452107282}
\ No newline at end of file
diff --git a/tests/results/examples/demo/6-code-json.ollama_ghactions.result b/tests/results/examples/demo/6-code-json.ollama_ghactions.result
deleted file mode 100644
index 0ab386980..000000000
--- a/tests/results/examples/demo/6-code-json.ollama_ghactions.result
+++ /dev/null
@@ -1 +0,0 @@
-{"input": {"source_code": "@SuppressWarnings(\"unchecked\")\npublic static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {\n  Map<String, String> offsetMap;\n  if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {    \n    offsetMap = new HashMap<>();  \n  } else {\n    offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);  \n  }\n  return offsetMap;\n}\n", "repo_info": {"repo": "streamsets/datacollector", "path": "stagesupport/src/main/java/com/.../OffsetUtil.java", "function_name": "OffsetUtil.deserializeOffsetMap"}}, "output": "The provided Java function `deserializeOffsetMap` is part of the StreamSets Data Collector (datacollector) repository, specifically located in the `stagesupport/src/main/java/com/` directory. This function is named `OffsetUtil.deserializeOffsetMap`. Here's a breakdown of its purpose and functionality:\n\n1. **Purpose**: The primary goal of this method is to deserialize a JSON string into a `Map<String, String>` object. It assumes that the input JSON represents an offset map with keys as strings (e.g., \"record_id\", \"timestamp\") and values also being strings (e.g., \"1234567890\" or \"2022-01-01T00:00:00Z\").\n\n2. **Input**: The method takes a single parameter, `lastSourceOffset`, which is expected to be a JSON string representing an offset map. If this input is null or empty (i.e., `null` or `\"\"`), the function initializes and returns a new `HashMap<String, String>` with no entries.\n\n3. **Deserialization**:\n   - When `lastSourceOffset` is not null or empty:\n     - The method uses Jackson's `JSON_MAPPER`, an instance of `ObjectMapper`, to parse the JSON string into a `Map<String, String>`. This is done using the `readValue()` method with `Map.class` as the target class.\n   - If `lastSourceOffset` is null or empty:\n     - The function initializes and returns a new `HashMap<String, String>` with no entries (i.e., an empty map).\n\n4. **Return Value**: Regardless of whether the input was null or non-empty, this method always returns a `Map<String, String>`. This ensures that the caller can safely use the returned object without worrying about potential null values.\n\n5. **Exception Handling**: The function does not explicitly handle `IOException`. However, since it's called within the context of StreamSets Data Collector (datacollector), any underlying I/O issues are likely to be managed by the framework itself.\n\nIn summary, this method serves as a utility for converting JSON strings into Map<String, String> objects representing offset data. It ensures that null or empty inputs result in an empty map, while non-empty inputs are parsed using Jackson's `JSON_MAPPER`.", "metric": 0.20641762452107282}
\ No newline at end of file
diff --git a/tests/results/examples/demo/7-chatbot-roles.ollama_ghactions.result b/tests/results/examples/demo/7-chatbot-roles.1.result
similarity index 100%
rename from tests/results/examples/demo/7-chatbot-roles.ollama_ghactions.result
rename to tests/results/examples/demo/7-chatbot-roles.1.result
diff --git a/tests/results/examples/demo/9-react.2.result b/tests/results/examples/demo/9-react.2.result
new file mode 100644
index 000000000..14c95d61f
--- /dev/null
+++ b/tests/results/examples/demo/9-react.2.result
@@ -0,0 +1,26 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+
+
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Apologies for the confusion, but it seems there's no specific information available about Henry Hudson's exact birthdate. However, he is estimated to have been born around 1565.
+
+
+Thought: Since I can't find an exact birthdate, I'll use the approximate year and calculate based on that.
+
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+In the year 2025, it would be approximately 460 years ago that Henry Hudson was born.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
+Observation: 
\ No newline at end of file
diff --git a/tests/results/examples/demo/9-react.3.result b/tests/results/examples/demo/9-react.3.result
new file mode 100644
index 000000000..a670f1757
--- /dev/null
+++ b/tests/results/examples/demo/9-react.3.result
@@ -0,0 +1,24 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Apologies for the confusion, but it seems there's no specific information available about Henry Hudson's exact birthdate. However, he is estimated to have been born around 1565.
+
+
+Thought: Since I can't find an exact birthdate, I'll use the approximate year and calculate based on that.
+
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+In the year 2025, it would be approximately 460 years ago that Henry Hudson was born.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
+Observation: 
diff --git a/tests/results/examples/demo/9-react.4.result b/tests/results/examples/demo/9-react.4.result
new file mode 100644
index 000000000..679bcf2cd
--- /dev/null
+++ b/tests/results/examples/demo/9-react.4.result
@@ -0,0 +1,14 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+Thought: Henry Hudson discovered the Hudson River in 1609. He was born around 1565.
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+Henry Hudson was born around 1565, so he was approximately 460 years ago.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
+Observation: 
diff --git a/tests/results/examples/fibonacci/fib.ollama_ghactions.result b/tests/results/examples/fibonacci/fib.1.result
similarity index 100%
rename from tests/results/examples/fibonacci/fib.ollama_ghactions.result
rename to tests/results/examples/fibonacci/fib.1.result
diff --git a/tests/results/examples/react/demo.2.result b/tests/results/examples/react/demo.2.result
new file mode 100644
index 000000000..f7748937e
--- /dev/null
+++ b/tests/results/examples/react/demo.2.result
@@ -0,0 +1,25 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+
+
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Apologies for the confusion, but it seems there's no specific information available about Henry Hudson's exact birthdate. However, he is estimated to have been born around 1565.
+
+
+Thought: Since I can't find an exact birthdate, I'll use the approximate year and calculate based on that.
+
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+In the year 2025, it would be approximately 460 years ago that Henry Hudson was born.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
\ No newline at end of file
diff --git a/tests/results/examples/react/demo.3.result b/tests/results/examples/react/demo.3.result
new file mode 100644
index 000000000..440e8fcee
--- /dev/null
+++ b/tests/results/examples/react/demo.3.result
@@ -0,0 +1,18 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Thought: I couldn't find a specific birthdate for Henry Hudson, but it's estimated he was born around 1565.
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+Thought: Henry Hudson was born around 1565, so that's approximately 460 years ago.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
diff --git a/tests/results/examples/react/demo.4.result b/tests/results/examples/react/demo.4.result
new file mode 100644
index 000000000..eda5d82e1
--- /dev/null
+++ b/tests/results/examples/react/demo.4.result
@@ -0,0 +1,23 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Apologies for the confusion, but it seems there's no specific information available about Henry Hudson's exact birthdate. However, he is estimated to have been born around 1565.
+
+
+Thought: Since I can't find an exact birthdate, I'll use the approximate year and calculate based on that.
+
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+In the year 2025, it would be approximately 460 years ago that Henry Hudson was born.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
diff --git a/tests/results/examples/react/demo.5.result b/tests/results/examples/react/demo.5.result
new file mode 100644
index 000000000..ce67247d3
--- /dev/null
+++ b/tests/results/examples/react/demo.5.result
@@ -0,0 +1,13 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+Thought: Henry Hudson discovered the Hudson River in 1609. He was born around 1565.
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+Henry Hudson was born around 1565, so he was approximately 460 years ago.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
diff --git a/tests/results/examples/react/demo.6.result b/tests/results/examples/react/demo.6.result
new file mode 100644
index 000000000..cf6b881cb
--- /dev/null
+++ b/tests/results/examples/react/demo.6.result
@@ -0,0 +1,23 @@
+How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching for a birthday for a person, simply ask for the name of that person.
+Thought: I need to find out who discovered the Hudson River and then calculate how many years ago their birthdate was.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}]
+Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States.
+In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. 
+On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown.
+
+
+Thought: Henry Hudson discovered the Hudson River in 1609. I need to find out when he was born.
+Action:
+[{"name": "Search", "arguments": {"topic": "Henry Hudson birthdate"}}]
+Observation: Page id "henry hudson birthday" does not match any pages. Try another id!
+Thought: I couldn't find a specific birthdate for Henry Hudson, but it's estimated he was born around 1565.
+
+Action:
+[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}]
+Observation: 460
+Thought: Henry Hudson was born around 1565, so in 2025, it has been approximately 460 years.
+
+Answer: It has been about 460 years since the birth of Henry Hudson.Action:
+[{"name": "Finish", "arguments": {"topic": "460"}}]
+
diff --git a/tests/results/examples/teacher/teacher.ollama_ghactions.result b/tests/results/examples/teacher/teacher.1.result
similarity index 100%
rename from tests/results/examples/teacher/teacher.ollama_ghactions.result
rename to tests/results/examples/teacher/teacher.1.result
diff --git a/tests/results/examples/tutorial/calling_llm_with_input_messages_var.ollama_ghactions.result b/tests/results/examples/tutorial/calling_llm_with_input_messages_var.1.result
similarity index 100%
rename from tests/results/examples/tutorial/calling_llm_with_input_messages_var.ollama_ghactions.result
rename to tests/results/examples/tutorial/calling_llm_with_input_messages_var.1.result
diff --git a/tests/results/examples/tutorial/defs.1.result b/tests/results/examples/tutorial/defs.1.result
index c9646a004..d5ba84ddc 100644
--- a/tests/results/examples/tutorial/defs.1.result
+++ b/tests/results/examples/tutorial/defs.1.result
@@ -1,2 +1,2 @@
 'J'aime Paris !'
-The translation of "I love Madrid!" into Spanish is: "Me encanta Madrid."
\ No newline at end of file
+The translation of "I love Madrid!" into Spanish is: "Me encanta Madrid."
diff --git a/tests/results/examples/tutorial/defs.ollama_ghactions.result b/tests/results/examples/tutorial/defs.ollama_ghactions.result
deleted file mode 100644
index 5fd7a7220..000000000
--- a/tests/results/examples/tutorial/defs.ollama_ghactions.result
+++ /dev/null
@@ -1,2 +0,0 @@
-'J'adore Paris !'
-The translation of "I love Madrid!" into Spanish is: "Me encanta Madrid!"
\ No newline at end of file
diff --git a/tests/results/examples/tutorial/function_definition.ollama_ghactions.result b/tests/results/examples/tutorial/function_definition.1.result
similarity index 100%
rename from tests/results/examples/tutorial/function_definition.ollama_ghactions.result
rename to tests/results/examples/tutorial/function_definition.1.result
diff --git a/tests/results/examples/tutorial/function_empty_context.ollama_ghactions.result b/tests/results/examples/tutorial/function_empty_context.1.result
similarity index 100%
rename from tests/results/examples/tutorial/function_empty_context.ollama_ghactions.result
rename to tests/results/examples/tutorial/function_empty_context.1.result
diff --git a/tests/results/examples/tutorial/input_stdin.0.result b/tests/results/examples/tutorial/input_stdin.0.result
index 4270aa50d..d3f68b23e 100644
--- a/tests/results/examples/tutorial/input_stdin.0.result
+++ b/tests/results/examples/tutorial/input_stdin.0.result
@@ -1,2 +1,2 @@
 The following will prompt the user on stdin.
-Hello
\ No newline at end of file
+What is APR?
\ No newline at end of file
diff --git a/tests/results/examples/tutorial/parser_regex_code.ollama_ghactions.result b/tests/results/examples/tutorial/parser_regex_code.1.result
similarity index 100%
rename from tests/results/examples/tutorial/parser_regex_code.ollama_ghactions.result
rename to tests/results/examples/tutorial/parser_regex_code.1.result
diff --git a/tests/results/examples/tutorial/programs/chatbot.ollama_ghactions.result b/tests/results/examples/tutorial/programs/chatbot.1.result
similarity index 100%
rename from tests/results/examples/tutorial/programs/chatbot.ollama_ghactions.result
rename to tests/results/examples/tutorial/programs/chatbot.1.result
diff --git a/tests/results/examples/tutorial/programs/code-json.ollama_ghactions.result b/tests/results/examples/tutorial/programs/code-json.1.result
similarity index 100%
rename from tests/results/examples/tutorial/programs/code-json.ollama_ghactions.result
rename to tests/results/examples/tutorial/programs/code-json.1.result
diff --git a/tests/results/examples/tutorial/programs/weather.ollama_ghactions.result b/tests/results/examples/tutorial/programs/weather.1.result
similarity index 100%
rename from tests/results/examples/tutorial/programs/weather.ollama_ghactions.result
rename to tests/results/examples/tutorial/programs/weather.1.result
diff --git a/tests/results/pdl-live-react/src-tauri/tests/cli/code-python.0.result b/tests/results/pdl-live-react/src-tauri/tests/cli/code-python.0.result
new file mode 100644
index 000000000..73f1631b5
--- /dev/null
+++ b/tests/results/pdl-live-react/src-tauri/tests/cli/code-python.0.result
@@ -0,0 +1 @@
+{'foo': 3}
\ No newline at end of file
diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py
index 3d699ed9d..6936147bc 100644
--- a/tests/test_examples_run.py
+++ b/tests/test_examples_run.py
@@ -2,303 +2,334 @@
 import os
 import pathlib
 import random
-from dataclasses import dataclass
-from typing import Optional
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Dict, List, Optional
 
+import yaml
 from pytest import CaptureFixture, MonkeyPatch
 
 from pdl import pdl
 from pdl.pdl_ast import ScopeType
-from pdl.pdl_dumper import block_to_dict
 from pdl.pdl_lazy import PdlDict
 from pdl.pdl_parser import PDLParseError
 
-# test_examples_run.py runs the examples and compares the results
-# to the expected results in tests/results/examples
-
-UPDATE_RESULTS = False
-RESULTS_VERSION = 1
-OLLAMA_GHACTIONS_RESULTS_ENV_VAR = os.getenv("OLLAMA_GHACTIONS_RESULTS", "")
-OLLAMA_GHACTIONS_RESULTS = False
-if OLLAMA_GHACTIONS_RESULTS_ENV_VAR.lower().strip() == "true":
-    OLLAMA_GHACTIONS_RESULTS = True
-
-TO_SKIP = {
-    str(name)
-    for name in [
-        # Requires dataset dependency
-        pathlib.Path("examples") / "cldk" / "cldk-assistant.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8k-plan.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8k-plan-few-shots.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8k-tot-few-shot.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8k-tot-multiplan.pdl",
-        pathlib.Path("examples") / "gsm8k" / "gsm8k-tot.pdl",
-        # Requires installation dependencies
-        pathlib.Path("examples") / "intrinsics" / "demo-hallucination.pdl",
-        pathlib.Path("examples") / "tutorial" / "programs" / "demo-hallucination.pdl",
-        pathlib.Path("pdl-live-react")
-        / "src-tauri"
-        / "tests"
-        / "cli"
-        / "code-python.pdl",
-        # Skip RAG examples
-        pathlib.Path("examples") / "rag" / "pdf_index.pdl",
-        pathlib.Path("examples") / "rag" / "pdf_query.pdl",
-        pathlib.Path("examples")
-        / "rag"
-        / "rag_library1.pdl",  # (This is glue to Python, it doesn't "run" alone)
-        # Skip structure decoding example (Jing doesn't have WATSONX API KEY)
-        pathlib.Path("examples") / "tutorial" / "structured_decoding.pdl",
-        # Output result include trace (and thus timing) for some reason. Investigate why
-        pathlib.Path("examples") / "react" / "react_call.pdl",
-        pathlib.Path("pdl-live-react") / "demos" / "error.pdl",
-        pathlib.Path("pdl-live-react") / "demos" / "demo1.pdl",
-        pathlib.Path("pdl-live-react") / "demos" / "demo2.pdl",
-        pathlib.Path("pdl-live-react")
-        / "src-tauri"
-        / "tests"
-        / "cli"
-        / "read-stdin.pdl",
-        # For now, skip the granite-io examples
-        pathlib.Path("examples") / "granite-io" / "granite_io_hallucinations.pdl",
-        pathlib.Path("examples") / "granite-io" / "granite_io_openai.pdl",
-        pathlib.Path("examples") / "granite-io" / "granite_io_thinking.pdl",
-        pathlib.Path("examples") / "granite-io" / "granite_io_transformers.pdl",
-        # Not executable without optimizer
-        pathlib.Path("examples") / "optimizer" / "mbpp.pdl",
-        pathlib.Path("examples") / "optimizer" / "fever.pdl",
-        pathlib.Path("examples") / "optimizer" / "gsm8k.pdl",
-    ]
-}
+EXAMPLES_RUN_CONFIG_FILE = os.getenv(
+    "EXAMPLES_RUN_FILE", "tests/test_examples_run.yaml"
+)
 
 
 @dataclass
 class InputsType:
+    """
+    Inputs to the PDL program for testing
+    """
+
     stdin: Optional[str] = None
     scope: Optional[ScopeType] = None
 
 
-TESTS_WITH_INPUT: dict[str, InputsType] = {
-    str(name): inputs
-    for name, inputs in {
-        pathlib.Path("examples")
-        / "tutorial"
-        / "programs"
-        / "chatbot.pdl": InputsType(stdin="What is APR?\nyes\n"),
-        pathlib.Path("examples")
-        / "tutorial"
-        / "input_stdin.pdl": InputsType(stdin="Hello\n"),
-        pathlib.Path("examples")
-        / "tutorial"
-        / "input_stdin_multiline.pdl": InputsType(stdin="Hello\nBye\n"),
-        pathlib.Path("examples")
-        / "input"
-        / "input_test1.pdl": InputsType(stdin="Hello\n"),
-        pathlib.Path("examples")
-        / "input"
-        / "input_test2.pdl": InputsType(stdin="Hello\n"),
-        pathlib.Path("examples")
-        / "chatbot"
-        / "chatbot.pdl": InputsType(stdin="What is APR?\nyes\n"),
-        pathlib.Path("examples")
-        / "demo"
-        / "7-chatbot-roles.pdl": InputsType(stdin="What is APR?\nquit\n"),
-        pathlib.Path("examples")
-        / "tutorial"
-        / "free_variables.pdl": InputsType(scope=PdlDict({"something": "ABC"})),
-        pathlib.Path("tests")
-        / "data"
-        / "optimizer_gsm8k.pdl": InputsType(
-            scope=PdlDict(
-                {
-                    "model": "watsonx_text/ibm/granite-3-8b-instruct",
-                    "prompt_pattern": "cot",
-                    "num_demonstrations": 0,
-                    "demonstrations": [],
-                    "question": "The sky currently has 4 times as many cirrus clouds as cumulus clouds, and 12 times as many cumulus clouds as cumulonimbus clouds. If the sky currently has 3 cumulonimbus clouds, how many cirrus clouds are in the sky at this moment?",
-                }
-            )
-        ),
-    }.items()
-}
-
-
-EXPECTED_PARSE_ERROR = [
-    pathlib.Path("tests") / "data" / "line" / "hello.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello1.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello4.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello7.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello8.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello10.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello11.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello31.pdl",
-]
-
-EXPECTED_RUNTIME_ERROR = [
-    pathlib.Path("examples") / "callback" / "repair_prompt.pdl",
-    pathlib.Path("examples") / "tutorial" / "type_list.pdl",
-    pathlib.Path("examples") / "tutorial" / "type_checking.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello12.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello13.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello14.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello15.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello16.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello17.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello18.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello19.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello20.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello21.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello22.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello23.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello24.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello25.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello26.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello27.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello28.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello29.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello3.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello30.pdl",
-    pathlib.Path("tests") / "data" / "line" / "hello9.pdl",
-]
-
-
-def __write_to_results_file(
-    dir_name: pathlib.Path, filename: str, content: str
-) -> None:
+class ExecutionErrorCode(Enum):
+    """
+    PDLExecutionErrorCode describes the execution result of the PDL file
+    """
+
+    NO_ERROR = 1
+    PARSE_ERROR = 2
+    RUNTIME_ERROR = 3
+
+
+@dataclass
+class ExecutionResult:
+    """
+    ExecutionResult captures the execution result of a PDL file
+    """
+
+    result: str | None = None
+    error_code: ExecutionErrorCode | None = None
+
+
+@dataclass
+class ExpectedResult:
     """
-    Write to results file
+    ExpectedResult captures the expected result of a PDL file.
+    Non-deterministic programs contain more than one valid result
     """
 
-    dir_name.mkdir(parents=True, exist_ok=True)
-    with open(dir_name / filename, "w", encoding="utf-8") as result_file:
-        result_file.write(content)
+    results: List[str] | None = None
+    error_code: ExecutionErrorCode | None = None
+
+    def compare_to_execution(self, execution_result: ExecutionResult) -> bool:
+        """
+        Returns true if execution matches to expected results and false otherwise
+        """
+
+        # ExecutionErrorCode codes must match
+        if execution_result.error_code != self.error_code:
+            return False
+
+        # Check if parse or runtime error
+        if self.error_code == ExecutionErrorCode.PARSE_ERROR:
+            return execution_result.error_code == ExecutionErrorCode.PARSE_ERROR
+        if self.error_code == ExecutionErrorCode.RUNTIME_ERROR:
+            return execution_result.error_code == ExecutionErrorCode.RUNTIME_ERROR
 
+        # At this point, it's NO_ERROR, so check for results
+        actual_result = execution_result.result
+        if actual_result is not None and self.results is not None:
+            actual_result_stripped = actual_result.strip()
+            for expected_result in self.results:
+                expected_result_stripped = expected_result.strip()
+                if actual_result_stripped == expected_result_stripped:
+                    return True
+        return False
 
-def __find_and_compare_results(
-    test_file_name: pathlib.Path, actual_result: str
-) -> bool:
+    def get_next_results_version(self) -> int:
+        """
+        Returns the next results version for this file
+        """
+
+        if self.results is None:
+            return 0
+        return len(self.results)
+
+
+@dataclass
+class FailedResults:
     """
-    Look through test_file_name's parent directory and see if any of *.result
-    matches the actual output
+    FailedResults are all the files that failed
     """
 
-    result_dir_name = pathlib.Path(".") / "tests" / "results" / test_file_name.parent
-    expected_files = result_dir_name.glob(test_file_name.stem + ".*.result")
+    wrong_results: Dict[str, str] = field(default_factory=lambda: {})
+    unexpected_parse_error: List[str] = field(default_factory=lambda: [])
+    unexpected_runtime_error: List[str] = field(default_factory=lambda: [])
+
+
+# pylint: disable=too-many-instance-attributes
+class ExamplesRun:
+    """
+    ExamplesRun outlines PDL files
+    - to skip
+    - requires inputs
+    - expects parse error
+    - expects runtime error
+    by loading the configuration from EXAMPLES_RUN_FILE
+    and runs the test
+    """
+
+    def __init__(self, monkeypatch: MonkeyPatch) -> None:
+        # Pytest
+        self.monkeypatch = monkeypatch
+
+        # Configuration
+        self.update_results: bool = False
+
+        # File manipulation
+        self.check = [str(f) for f in pathlib.Path(".").glob("**/*.pdl")]
+        self.skip: List[str] = []
+        self.with_inputs: Dict[str, InputsType] = {}
+        self.expected_parse_error: List[str] = []
+        self.expected_runtime_error: List[str] = []
+
+        # Load content from EXAMPLES_RUN_FILE
+        with open(EXAMPLES_RUN_CONFIG_FILE, "r", encoding="utf-8") as file:
+            content = yaml.safe_load(file)
+            self.update_results = content["update_results"]
+
+            # Update files to check iff check is specified
+            if content["check"] is not None:
+                if len(content["check"]) > 0:
+                    self.check = content["check"]
+
+            self.skip = content["skip"]
+            self.expected_parse_error = content["expected_parse_error"]
+            self.expected_runtime_error = content["expected_runtime_error"]
+
+            for filename, inputs_type in content["with_inputs"].items():
+                stdin, scope = None, None
+                if "stdin" in inputs_type:
+                    stdin = inputs_type["stdin"]
+                if "scope" in inputs_type:
+                    scope = inputs_type["scope"]
+                self.with_inputs[filename] = InputsType(
+                    stdin=stdin, scope=PdlDict(scope) if scope is not None else None
+                )
+
+        # Inits expected results
+        self.expected_results: Dict[str, ExpectedResult] = {}
+        self.__collect_expected_results()
 
-    for expected_file in expected_files:
-        with open(expected_file, "r", encoding="utf-8") as truth_file:
-            expected_result = str(truth_file.read())
-            if str(actual_result).strip() == expected_result.strip():
-                return True
-    return False
+        # Inits execution results for each PDL file
+        self.execution_results: Dict[str, ExecutionResult] = {}
 
+        # Init failed results
+        self.failed_results = FailedResults()
 
-def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
-    actual_parse_error: set[str] = set()
-    actual_runtime_error: set[str] = set()
-    wrong_results = {}
+    def __get_results_dir(self) -> pathlib.Path:
+        return pathlib.Path(".") / "tests" / "results"
 
-    files = pathlib.Path(".").glob("**/*.pdl")
+    def __collect_expected_results(self) -> None:
+        """
+        Collects possible results for programs in self.check
+        """
 
-    for pdl_file_name in files:
+        for file in self.check:
+            expected_result = ExpectedResult()
 
+            if file in self.expected_parse_error:
+                expected_result.error_code = ExecutionErrorCode.PARSE_ERROR
+            elif file in self.expected_runtime_error:
+                expected_result.error_code = ExecutionErrorCode.RUNTIME_ERROR
+            else:
+
+                # Collect possible results
+                res_list = []
+                file_path: pathlib.Path = pathlib.Path(file)
+                result_dir_name = self.__get_results_dir() / file_path.parent
+                expected_files = result_dir_name.glob(file_path.stem + ".*.result")
+
+                for expected_file in expected_files:
+                    with open(expected_file, "r", encoding="utf-8") as truth_file:
+                        content = truth_file.read()
+                        res_list.append(content)
+
+                expected_result.error_code = ExecutionErrorCode.NO_ERROR
+                expected_result.results = res_list
+
+            self.expected_results[file] = expected_result
+
+    def __execute_file(self, pdl_file_name: str) -> None:
+        """
+        Tests the result of a single file and returns the result output and the error code
+        """
+
+        exec_result = ExecutionResult()
+
+        pdl_file_path = pathlib.Path(pdl_file_name)
         scope: ScopeType = PdlDict({})
-        if str(pdl_file_name) in TO_SKIP:
-            continue
-        if str(pdl_file_name) in TESTS_WITH_INPUT:
-            inputs = TESTS_WITH_INPUT[str(pdl_file_name)]
+
+        # Patch with inputs
+        if pdl_file_name in self.with_inputs:
+            inputs = self.with_inputs[pdl_file_name]
             if inputs.stdin is not None:
-                monkeypatch.setattr(
-                    "sys.stdin",
-                    io.StringIO(inputs.stdin),
-                )
+                self.monkeypatch.setattr("sys.stdin", io.StringIO(inputs.stdin))
             if inputs.scope is not None:
                 scope = inputs.scope
+
         try:
-            random.seed(11)
+            # Execute file
             output = pdl.exec_file(
-                pdl_file_name,
+                pdl_file_path,
                 scope=scope,
                 output="all",
                 config=pdl.InterpreterConfig(batch=0),
             )
-            actual_result = output["result"]
 
-            block_to_dict(output["trace"], json_compatible=True)
-            result_dir_name = (
-                pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent
-            )
+            exec_result.result = str(output["result"])
+            exec_result.error_code = ExecutionErrorCode.NO_ERROR
 
-            if not __find_and_compare_results(pdl_file_name, str(actual_result)):
-
-                if OLLAMA_GHACTIONS_RESULTS:
-                    print(
-                        f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions"
-                    )
-                    print(f"Actual results: {str(actual_result)}")
-                    result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result"
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
-
-                    # Evaluate the results again. If fails again, then consider this program as failing
-                    if not __find_and_compare_results(
-                        pdl_file_name, str(actual_result)
-                    ):
-                        print(
-                            f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
-                        )
-                        wrong_results[str(pdl_file_name)] = {
-                            "actual": str(actual_result),
-                        }
-                    # If evaluating results produces correct result, then this is considered passing
-                    else:
-                        continue
-
-                if UPDATE_RESULTS:
-                    result_file_name = (
-                        f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result"
-                    )
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
-
-                wrong_results[str(pdl_file_name)] = {
-                    "actual": str(actual_result),
-                }
         except PDLParseError:
-            actual_parse_error |= {str(pdl_file_name)}
-        except Exception as exc:
-            if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
-                print(f"{pdl_file_name}: {exc}")  # unexpected error: breakpoint
-            actual_runtime_error |= {str(pdl_file_name)}
-            print(exc)
-
-    # Parse errors
-    expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
-    unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
-    assert (
-        len(unexpected_parse_error) == 0
-    ), f"Unexpected parse error: {unexpected_parse_error}"
-
-    # Runtime errors
-    expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
-    unexpected_runtime_error = sorted(
-        list(actual_runtime_error - expected_runtime_error)
-    )
-    assert (
-        len(unexpected_runtime_error) == 0
-    ), f"Unexpected runtime error: {unexpected_runtime_error}"
-
-    # Unexpected valid
-    unexpected_valid = sorted(
-        list(
-            (expected_parse_error - actual_parse_error).union(
-                expected_runtime_error - actual_runtime_error
+            exec_result.error_code = ExecutionErrorCode.PARSE_ERROR
+        except Exception:
+            exec_result.error_code = ExecutionErrorCode.RUNTIME_ERROR
+
+        self.execution_results[pdl_file_name] = exec_result
+
+    def populate_exec_result_for_checks(self) -> None:
+        """
+        Populates the execution result for all files in self.checks
+        """
+
+        for file in self.check:
+            if file not in self.skip:
+                self.__execute_file(file)
+
+    def validate_expected_and_actual(self) -> None:
+        """
+        Validates the expected result to actual result
+        Must be run after populate_exec_result_for_checks
+        """
+
+        wrong_result: Dict[str, str] = {}
+        unexpected_parse_error: List[str] = []
+        unexpected_runtime_error: List[str] = []
+
+        for file in self.check:
+            if file not in self.skip:
+                expected_result = self.expected_results[file]
+                actual_result = self.execution_results[file]
+                match = expected_result.compare_to_execution(actual_result)
+
+                if not match:
+                    # Check if actual results caused any error
+                    if actual_result.error_code == ExecutionErrorCode.PARSE_ERROR:
+                        unexpected_parse_error.append(file)
+                    elif actual_result.error_code == ExecutionErrorCode.RUNTIME_ERROR:
+                        unexpected_runtime_error.append(file)
+                    # If no error, then the results are wrong
+                    else:
+                        if actual_result.result is not None:
+                            wrong_result[file] = actual_result.result
+
+        self.failed_results.wrong_results = wrong_result
+        self.failed_results.unexpected_parse_error = unexpected_parse_error
+        self.failed_results.unexpected_runtime_error = unexpected_runtime_error
+
+    def write_results(self) -> None:
+        """
+        Writes new results for failed files
+        """
+
+        results_dir = self.__get_results_dir()
+        for file in self.failed_results.wrong_results:
+            next_results_version = str(
+                self.expected_results[file].get_next_results_version()
             )
+            # Mkdir if not exist
+            file_path = pathlib.Path(file)
+            write_file_dir = results_dir / file_path.parent
+            write_file_dir.mkdir(parents=True, exist_ok=True)
+
+            # Write to new file
+            write_file_name = (
+                write_file_dir / f"{file_path.stem}.{next_results_version}.result"
+            )
+            actual_result = self.failed_results.wrong_results[file]
+
+            with open(write_file_name, "w", encoding="utf-8") as f:
+                f.write(actual_result)
+
+
+def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
+    """
+    Runs the test
+    """
+
+    random.seed(11)
+    background = ExamplesRun(monkeypatch)
+
+    background.populate_exec_result_for_checks()
+    background.validate_expected_and_actual()
+
+    if background.update_results:
+        background.write_results()
+
+    # Print the actual results for wrong results
+    for file, actual in background.failed_results.wrong_results.items():
+        print(
+            "\n============================================================================"
         )
-    )
-    assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}"
-    # Unexpected results
-    assert len(wrong_results) == 0, f"Wrong results: {wrong_results}"
+        print(f"File that produced wrong result: {file}")
+        print(
+            f"Actual result (copy everything below this line):\n✂️ ------------------------------------------------------------\n{actual}\n-------------------------------------------------------------"
+        )
+
+    assert (
+        len(background.failed_results.unexpected_parse_error) == 0
+    ), f"Unexpected parse error: {background.failed_results.unexpected_parse_error}"
+    assert (
+        len(background.failed_results.unexpected_runtime_error) == 0
+    ), f"Unexpected runtime error: {background.failed_results.unexpected_runtime_error}"
+    assert (
+        len(background.failed_results.wrong_results) == 0
+    ), f"Wrong results: {background.failed_results.wrong_results}"
diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml
new file mode 100644
index 000000000..0aed7e855
--- /dev/null
+++ b/tests/test_examples_run.yaml
@@ -0,0 +1,110 @@
+update_results: false
+check: []
+skip:
+  - examples/cldk/cldk-assistant.pdl
+  - examples/gsm8k/gsm8.pdl
+  - examples/gsm8k/gsm8k-plan.pdl
+  - examples/gsm8k/gsm8k-plan-few-shots.pdl
+  - examples/gsm8k/gsm8k-tot-few-shot.pdl
+  - examples/gsm8k/gsm8k-tot-multiplan.pdl
+  - examples/gsm8k/gsm8k-tot.pdl
+  - examples/intrinsics/demo-hallucination.pdl
+  - examples/tutorial/programs/demo-hallucination.pdl
+  - examples/rag/pdf_index.pdl
+  - examples/rag/pdf_query.pdl
+  - examples/rag/rag_library1.pdl
+  - examples/tutorial/structured_decoding.pdl
+  - examples/react/react_call.pdl
+  - pdl-live-react/src-tauri/tests/cli/code-python.pdl
+  - pdl-live-react/demos/error.pdl
+  - pdl-live-react/demos/demo1.pdl
+  - pdl-live-react/demos/demo2.pdl
+  - pdl-live-react/src-tauri/tests/cli/read-stdin.pdl
+  - examples/granite-io/granite_io_hallucinations.pdl
+  - examples/granite-io/granite_io_openai.pdl
+  - examples/granite-io/granite_io_thinking.pdl
+  - examples/granite-io/granite_io_transformers.pdl
+  - examples/optimizer/mbpp.pdl
+  - examples/optimizer/fever.pdl
+  - examples/optimizer/gsm8k.pdl
+with_inputs:
+  examples/tutorial/programs/chatbot.pdl:
+    stdin: |
+      What is APR?
+      yes
+    scope: null
+  examples/chatbot/chatbot.pdl:
+    stdin: |
+      What is APR?
+      yes
+    scope: null
+  examples/demo/7-chatbot-roles.pdl:
+    stdin: |
+      What is APR?
+      quit
+    scope: null
+  examples/tutorial/input_stdin.pdl:
+    stdin: |
+      What is APR?
+      yes
+    scope: null
+  examples/tutorial/input_stdin_multiline.pdl:
+    stdin: |
+      Hello
+      Bye
+    scope: null
+  examples/input/input_test1.pdl:
+    stdin: |
+      Hello
+    scope: null
+  examples/input/input_test2.pdl:
+    stdin: |
+      Hello
+    scope: null
+  examples/tutorial/free_variables.pdl:
+    stdin: null
+    scope:
+      something: ABC
+  tests/data/optimizer_gsm8k.pdl:
+    stdin: null
+    scope: 
+      model: watsonx_text/ibm/granite-3-8b-instruct
+      prompt_pattern: cot
+      num_demonstrations: 0
+      demonstrations: []
+      question: "The sky currently has 4 times as many cirrus clouds as cumulus clouds, and 12 times as many cumulus clouds as cumulonimbus clouds. If the sky currently has 3 cumulonimbus clouds, how many cirrus clouds are in the sky at this moment?"
+      
+expected_parse_error:
+  - tests/data/line/hello.pdl
+  - tests/data/line/hello1.pdl
+  - tests/data/line/hello4.pdl
+  - tests/data/line/hello7.pdl
+  - tests/data/line/hello8.pdl
+  - tests/data/line/hello10.pdl
+  - tests/data/line/hello11.pdl
+  - tests/data/line/hello31.pdl
+expected_runtime_error:
+  - examples/callback/repair_prompt.pdl
+  - examples/tutorial/type_list.pdl
+  - examples/tutorial/type_checking.pdl
+  - tests/data/line/hello3.pdl
+  - tests/data/line/hello9.pdl
+  - tests/data/line/hello12.pdl
+  - tests/data/line/hello13.pdl
+  - tests/data/line/hello14.pdl
+  - tests/data/line/hello15.pdl
+  - tests/data/line/hello16.pdl
+  - tests/data/line/hello17.pdl
+  - tests/data/line/hello18.pdl
+  - tests/data/line/hello19.pdl
+  - tests/data/line/hello20.pdl
+  - tests/data/line/hello21.pdl
+  - tests/data/line/hello22.pdl
+  - tests/data/line/hello23.pdl
+  - tests/data/line/hello24.pdl
+  - tests/data/line/hello25.pdl
+  - tests/data/line/hello26.pdl
+  - tests/data/line/hello27.pdl
+  - tests/data/line/hello28.pdl
+  - tests/data/line/hello29.pdl
+  - tests/data/line/hello30.pdl