sktime · biru-codeastromer · Apr 30, 2026
diff --git a/docs/source/user-guide.md b/docs/source/user-guide.md
@@ -26,6 +26,22 @@ pip install -e .
 pip install -e ".[all]"
 ```
 
+### Integrating From An External Agent Project
+
+If you are calling `sktime-mcp` from your own agent codebase, the most reliable setup is an editable install from the repository root:
+
+```bash
+pip install -e .
+```
+
+If you are temporarily importing the raw source tree before packaging the dependency, use a `PYTHONPATH` fallback that points to `src/`:
+
+```bash
+PYTHONPATH=/path/to/sktime-mcp/src python your_agent.py
+```
+
+The editable install path is preferred because it keeps imports consistent across examples, tests, and external agent experiments.
+
 ### Running the Server
 
 Start the MCP server to begin listening for connections:
@@ -184,6 +200,9 @@ Returns a data handle, e.g., `{"handle": "data_xyz"}`.
 
 </details>
 
+!!! tip "Inspect Handle Metadata Before Picking A Model"
+    After `load_data_source`, agents should look at the returned metadata before selecting a model path. Fields such as `rows`, `frequency`, `columns`, `exog_columns`, and `dtypes` already provide useful context for deciding whether the loaded handle matches the intended forecasting workflow.
+
 !!! warning "Absolute Paths Required"
     The server requires **absolute file paths** (e.g., `/home/user/data.csv`). Relative paths may fail depending on where the server was started.
 
@@ -280,6 +299,36 @@ The assistant persists the fitted estimator to the specified path using sktime's
 
 ---
 
+### 5. Cross-Validated Evaluation
+
+**What you want:** Compare candidate forecasters before committing to one.
+
+Ask your assistant for a backtest on a built-in demo dataset:
+
+> *"Evaluate ThetaForecaster on airline using 3 folds."*
+
+The current MCP contract for `evaluate_estimator` is:
+
+- `dataset` must be the name of a **demo dataset**
+- `cv_folds` must be a **positive integer**
+- the tool returns **per-fold rows** with metric columns such as `test_MeanAbsolutePercentageError`
+
+<details>
+<summary>🔧 MCP tool calls (what happens under the hood)</summary>
+
+```json
+{"tool": "instantiate_estimator", "arguments": {"estimator": "ThetaForecaster"}}
+```
+```json
+{"tool": "evaluate_estimator", "arguments": {"estimator_handle": "est_abc123", "dataset": "airline", "cv_folds": 3}}
+```
+
+</details>
+
+For a runnable integration example that combines custom data loading with this evaluation flow, see `examples/08_external_agent_integration.py`.
+
+---
+
 ## 💾 Data Management
 
 ### Supported Data Sources

diff --git a/examples/08_external_agent_integration.py b/examples/08_external_agent_integration.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+"""
+Example 8: External Agent Integration Workflow
+
+This example shows a minimal pattern for integrating sktime-mcp tool
+functions inside a custom agent or orchestration layer.
+
+It demonstrates:
+1. Loading user-provided data and reading agent-friendly metadata
+2. Fitting and predicting from a custom data handle
+3. Calling evaluate_estimator with the currently supported inputs
+
+Run this example:
+    python examples/08_external_agent_integration.py
+"""
+
+import sys
+
+import pandas as pd
+
+sys.path.insert(0, "src")
+
+from sktime_mcp.tools.data_tools import load_data_source_tool, release_data_handle_tool
+from sktime_mcp.tools.evaluate import evaluate_estimator_tool
+from sktime_mcp.tools.fit_predict import fit_predict_tool
+from sktime_mcp.tools.instantiate import instantiate_estimator_tool
+
+
+def print_step(title: str):
+    """Print a visible step heading."""
+    print(f"\n== {title} ==")
+
+
+def main():
+    """Run a simple external-agent style workflow."""
+    print("External Agent Integration Workflow")
+    print("Use editable install first: pip install -e .")
+    print("Fallback during source-only integration: PYTHONPATH=/path/to/sktime-mcp/src")
+
+    print_step("1. Load custom data and inspect metadata")
+    load_result = load_data_source_tool(
+        {
+            "type": "pandas",
+            "data": {
+                "date": pd.date_range(start="2021-01-01", periods=24, freq="D"),
+                "sales": [100 + i for i in range(24)],
+                "promo": [0, 1] * 12,
+            },
+            "time_column": "date",
+            "target_column": "sales",
+            "exog_columns": ["promo"],
+        }
+    )
+    assert load_result["success"], load_result
+    data_handle = load_result["data_handle"]
+    metadata = load_result["metadata"]
+    print(f"Loaded handle: {data_handle}")
+    print(
+        "Metadata summary: "
+        f"rows={metadata.get('rows')}, "
+        f"frequency={metadata.get('frequency')}, "
+        f"columns={metadata.get('columns')}, "
+        f"exog_columns={metadata.get('exog_columns', [])}"
+    )
+
+    print_step("2. Fit and predict from a custom data handle")
+    inst_result = instantiate_estimator_tool("NaiveForecaster", {"strategy": "last"})
+    assert inst_result["success"], inst_result
+    fit_result = fit_predict_tool(
+        inst_result["handle"],
+        dataset="",
+        horizon=3,
+        data_handle=data_handle,
+    )
+    assert fit_result["success"], fit_result
+    print(f"Custom-data forecast horizon: {fit_result['horizon']}")
+
+    print_step("3. Evaluate on a demo dataset using current MCP contract")
+    eval_result = evaluate_estimator_tool(inst_result["handle"], dataset="airline", cv_folds=3)
+    assert eval_result["success"], eval_result
+    first_row = eval_result["results"][0]
+    metric_keys = sorted(k for k in first_row if k.startswith("test_"))
+    print(f"Evaluation folds requested: {eval_result['cv_folds_requested']}")
+    print(f"Returned metric keys: {', '.join(metric_keys[:3])}")
+
+    cleanup = release_data_handle_tool(data_handle)
+    assert cleanup["success"], cleanup
+    print("\nExternal agent workflow complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_external_agent_integration_example.py b/tests/test_external_agent_integration_example.py
@@ -0,0 +1,29 @@
+"""Smoke test for the external agent integration example."""
+
+import sys
+from pathlib import Path
+from subprocess import run
+
+
+def test_external_agent_integration_example_runs():
+    """The example should run and print key integration guidance."""
+    repo_root = Path(__file__).resolve().parents[1]
+    script = repo_root / "examples" / "08_external_agent_integration.py"
+
+    result = run(
+        [sys.executable, str(script)],
+        cwd=repo_root,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    stdout = result.stdout
+    assert "External Agent Integration Workflow" in stdout
+    assert "Use editable install first: pip install -e ." in stdout
+    assert "Fallback during source-only integration: PYTHONPATH=/path/to/sktime-mcp/src" in stdout
+    assert "Metadata summary:" in stdout
+    assert "Custom-data forecast horizon: 3" in stdout
+    assert "Evaluation folds requested: 3" in stdout
+    assert "Returned metric keys:" in stdout
+    assert "External agent workflow complete!" in stdout