From ab542ed5293fb5bbcaa604ecd9333bb7f302ec58 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 18:44:01 -0300 Subject: [PATCH 01/17] add agent skills from pydantic-ai-skills --- docs/api/toolsets.md | 16 + docs/skills.md | 535 +++++++++++ .../skills/arxiv-search/SKILL.md | 75 ++ .../arxiv-search/scripts/arxiv_search.py | 63 ++ .../skills/pydanticai-docs/SKILL.md | 131 +++ examples/pydantic_ai_examples/skills_agent.py | 43 + mkdocs.yml | 1 + pydantic_ai_slim/pydantic_ai/__init__.py | 2 + .../pydantic_ai/toolsets/__init__.py | 27 + .../pydantic_ai/toolsets/skills/__init__.py | 64 ++ .../pydantic_ai/toolsets/skills/_discovery.py | 307 ++++++ .../toolsets/skills/_exceptions.py | 21 + .../pydantic_ai/toolsets/skills/_toolset.py | 424 +++++++++ .../pydantic_ai/toolsets/skills/_types.py | 90 ++ pydantic_ai_slim/pyproject.toml | 1 + tests/test_skills.py | 900 ++++++++++++++++++ uv.lock | 2 + 17 files changed, 2702 insertions(+) create mode 100644 docs/skills.md create mode 100644 examples/pydantic_ai_examples/skills/arxiv-search/SKILL.md create mode 100644 examples/pydantic_ai_examples/skills/arxiv-search/scripts/arxiv_search.py create mode 100644 examples/pydantic_ai_examples/skills/pydanticai-docs/SKILL.md create mode 100644 examples/pydantic_ai_examples/skills_agent.py create mode 100644 pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py create mode 100644 pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py create mode 100644 pydantic_ai_slim/pydantic_ai/toolsets/skills/_exceptions.py create mode 100644 pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py create mode 100644 pydantic_ai_slim/pydantic_ai/toolsets/skills/_types.py create mode 100644 tests/test_skills.py diff --git a/docs/api/toolsets.md b/docs/api/toolsets.md index 2b0a22881b..d1d361c0f4 100644 --- a/docs/api/toolsets.md +++ b/docs/api/toolsets.md @@ -12,7 +12,23 @@ - PrefixedToolset - RenamedToolset - PreparedToolset + - SkillsToolset - WrapperToolset - ToolsetFunc +::: pydantic_ai.toolsets.skills + options: + members: + - Skill + - SkillMetadata + - SkillResource + - SkillScript + - discover_skills + - parse_skill_md + - SkillException + - SkillNotFoundError + - SkillResourceLoadError + - SkillScriptExecutionError + - SkillValidationError + ::: pydantic_ai.toolsets.fastmcp diff --git a/docs/skills.md b/docs/skills.md new file mode 100644 index 0000000000..837b0a1150 --- /dev/null +++ b/docs/skills.md @@ -0,0 +1,535 @@ +# Skills + +A standardized, composable framework for building and managing Agent Skills. Skills are modular collections of instructions, scripts, tools, and resources that enable AI agents to progressively discover, load, and execute specialized capabilities for domain-specific tasks. + +## What are Agent Skills? + +Agent Skills are **modular packages** that extend your agent's capabilities without hardcoding every possible feature into your agent's instructions. Think of them as plugins that agents can discover and load on-demand. + +Key benefits: + +- **🔍 Progressive Discovery**: Agents list available skills and load only what they need +- **📦 Modular Design**: Each skill is a self-contained directory with instructions and resources +- **🛠️ Script Execution**: Skills can include executable Python scripts +- **📚 Resource Management**: Support for additional documentation and data files +- **🚀 Easy Integration**: Simple toolset interface that works with any Pydantic AI agent + +## Quick Example + +```python +from pydantic_ai import Agent, SkillsToolset + +# Initialize Skills Toolset with skill directories +skills_toolset = SkillsToolset(directories=["./skills"]) + +# Create agent with skills +agent = Agent( + model='openai:gpt-4o', + instructions="You are a helpful research assistant.", + toolsets=[skills_toolset] +) + +# Add skills system prompt +@agent.system_prompt +async def add_skills_to_system_prompt() -> str: + return skills_toolset.get_skills_system_prompt() + +# Use agent - skills tools are automatically available +result = await agent.run( + "What are the last 3 papers on arXiv about machine learning?" +) +print(result.output) +``` + +!!! note "Alternative Import" + You can also import `SkillsToolset` from `pydantic_ai.toolsets`: + ```python + from pydantic_ai.toolsets import SkillsToolset + ``` + +## How It Works + +1. **Discovery**: The toolset scans specified directories for skills (folders with `SKILL.md` files) +2. **Registration**: Skills are registered as tools on your agent +3. **Progressive Loading**: Agents can: + - List all available skills with `list_skills()` (optional, as skills are in system prompt) + - Load detailed instructions with `load_skill(name)` + - Read additional resources with `read_skill_resource(skill_name, resource_name)` + - Execute scripts with `run_skill_script(skill_name, script_name, args)` + +## Creating Skills + +### Basic Skill Structure + +Every skill must have at minimum a `SKILL.md` file: + +```markdown +my-skill/ +├── SKILL.md # Required: Instructions and metadata +├── scripts/ # Optional: Executable scripts +│ └── my_script.py +└── resources/ # Optional: Additional files +├── reference.md +└── data.json +``` + +### SKILL.md Format + +The `SKILL.md` file uses **YAML frontmatter** for metadata and **Markdown** for instructions: + +```markdown +--- +name: arxiv-search +description: Search arXiv for research papers +version: 1.0.0 +author: Your Name +tags: [papers, arxiv, academic] +--- + +# arXiv Search Skill + +## When to Use + +Use this skill when you need to: + +- Find recent preprints in physics, math, or computer science +- Search for papers not yet published in journals +- Access cutting-edge research + +## Instructions + +To search arXiv, use the `run_skill_script` tool with: + +1. **skill_name**: "arxiv-search" +2. **script_name**: "arxiv_search" +3. **args**: Your search query and options + +## Example +``` + +run_skill_script( +skill_name="arxiv-search", +script_name="arxiv_search", +args=["machine learning", "--max-papers", "5"] +) + +``` + +``` + +### Required Fields + +- `name`: Unique identifier (lowercase letters, numbers, and hyphens only) +- `description`: Brief summary (appears in skill listings, max 1024 characters) + +### Naming Conventions + +Following Anthropic's skill naming conventions: + +| Requirement | Example | +| ------------------ | -------------------------------------- | +| Lowercase only | `arxiv-search` ✅, `ArxivSearch` ❌ | +| Hyphens for spaces | `web-research` ✅, `web_research` ❌ | +| Max 64 characters | `data-analyzer` ✅ | +| No reserved words | Avoid "anthropic" or "claude" in names | + +## Progressive Disclosure + +The toolset implements **progressive disclosure** - exposing information only when needed: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ System Prompt (via get_skills_system_prompt()) │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ Available Skills: │ │ +│ │ - arxiv-search: Search arXiv for research papers │ │ +│ │ - web-research: Research topics on the web │ │ +│ │ - data-analyzer: Analyze CSV and JSON files │ │ +│ └───────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + Agent sees skill names & descriptions + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ load_skill("arxiv-search") │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ Returns full SKILL.md instructions: │ │ +│ │ - When to use │ │ +│ │ - Step-by-step guide │ │ +│ │ - Example invocations │ │ +│ │ - Available resources and scripts │ │ +│ └───────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + Agent loads detailed instructions when needed +``` + +This approach: + +- **Reduces initial context size** - Only metadata is in the system prompt +- **Lets agents discover capabilities dynamically** - Load what's needed +- **Improves token efficiency** - Don't pay for unused instructions +- **Scales to many skills** - Add hundreds of skills without bloating prompts + +## The Four Tools + +The `SkillsToolset` provides four tools to agents: + +### 1. `list_skills()` + +Lists all available skills with their descriptions. + +**Returns**: Formatted markdown with skill names and descriptions + +**When to use**: Optional - skills are already listed in the system prompt via `get_skills_system_prompt()`. Use only if the agent needs to re-check available skills dynamically. + +### 2. `load_skill(skill_name)` + +Loads the complete instructions for a specific skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill to load + +**Returns**: Full SKILL.md content including detailed instructions, available resources, and scripts + +**When to use**: When the agent needs detailed instructions for using a skill + +### 3. `read_skill_resource(skill_name, resource_name)` + +Reads additional resource files from a skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill +- `resource_name` (str) - Resource filename (e.g., "FORMS.md") + +**Returns**: Content of the resource file + +**When to use**: When a skill references additional documentation or data files + +### 4. `run_skill_script(skill_name, script_name, args)` + +Executes a Python script from a skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill +- `script_name` (str) - Script name without .py extension +- `args` (list[str], optional) - Command-line arguments + +**Returns**: Script output (stdout and stderr combined) + +**When to use**: When a skill needs to execute custom code + +## Adding Scripts to Skills + +Scripts enable skills to perform custom operations that aren't available as standard agent tools. + +### Script Location + +Place scripts in either: + +- `scripts/` subdirectory (recommended) +- Directly in the skill folder + +``` +my-skill/ +├── SKILL.md +└── scripts/ + ├── process_data.py + └── fetch_info.py +``` + +### Writing Scripts + +Scripts should: + +- Accept command-line arguments via `sys.argv` +- Print output to stdout +- Exit with code 0 on success, non-zero on error +- Handle errors gracefully + +```python +#!/usr/bin/env python3 +"""Example skill script.""" + +import sys +import json + +def main(): + if len(sys.argv) < 2: + print("Usage: process_data.py ") + sys.exit(1) + + input_data = sys.argv[1] + + try: + # Process the input + result = {"processed": input_data.upper()} + print(json.dumps(result, indent=2)) + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() +``` + +## SkillsToolset API + +### Initialization + +```python +from pydantic_ai.toolsets import SkillsToolset + +toolset = SkillsToolset( + directories=["./skills", "./shared-skills"], + auto_discover=True, # Auto-discover skills on init (default: True) + validate=True, # Validate skill structure (default: True) + toolset_id="skills", # Unique identifier (default: "skills") + script_timeout=30, # Script execution timeout in seconds (default: 30) + python_executable=None, # Python executable path (default: sys.executable) +) +``` + +### Key Methods + +| Method | Description | +| ---------------------------- | ---------------------------------------------- | +| `get_skills_system_prompt()` | Get system prompt text with all skill metadata | +| `get_skill(name)` | Get a specific skill object by name | +| `refresh()` | Re-scan directories for skills | + +### Properties + +| Property | Description | +| -------- | ------------------------------------------------ | +| `skills` | Dictionary of loaded skills (`dict[str, Skill]`) | + +## Skill Discovery + +Skills can be discovered programmatically: + +```python +from pydantic_ai.toolsets import discover_skills + +skills = discover_skills( + directories=["./skills"], + validate=True +) + +for skill in skills: + print(f"{skill.name}: {skill.metadata.description}") + print(f" Resources: {[r.name for r in skill.resources]}") + print(f" Scripts: {[s.name for s in skill.scripts]}") +``` + +## Type Reference + +### Skill + +```python +from pydantic_ai import SkillsToolset +from pydantic_ai.toolsets.skills import Skill + +skill = Skill( + name="my-skill", + path=Path("./skills/my-skill"), + metadata=SkillMetadata(...), + content="# Instructions...", + resources=[SkillResource(...)], + scripts=[SkillScript(...)], +) +``` + +### SkillMetadata + +```python +from pydantic_ai.toolsets.skills import SkillMetadata + +metadata = SkillMetadata( + name="my-skill", + description="My skill description", + extra={"version": "1.0.0", "author": "Me"} +) +``` + +### SkillResource + +```python +from pydantic_ai.toolsets.skills import SkillResource + +resource = SkillResource( + name="FORMS.md", + path=Path("./skills/my-skill/FORMS.md"), + content=None, # Lazy-loaded +) +``` + +### SkillScript + +```python +from pydantic_ai.toolsets.skills import SkillScript + +script = SkillScript( + name="process_data", + path=Path("./skills/my-skill/scripts/process_data.py"), + skill_name="my-skill", +) +``` + +## Security Considerations + +!!! warning "Use Skills from Trusted Sources Only" + + Skills provide AI agents with new capabilities through instructions and code. While this makes them powerful, it also means a malicious skill can direct agents to invoke tools or execute code in ways that don't match the skill's stated purpose. + + If you must use a skill from an untrusted or unknown source, exercise extreme caution and thoroughly audit it before use. Depending on what access agents have when executing the skill, malicious skills could lead to data exfiltration, unauthorized system access, or other security risks. + +The toolset includes security measures: + +- **Path traversal prevention**: Resources and scripts are validated to stay within the skill directory +- **Script timeout**: Scripts have a configurable timeout (default: 30 seconds) +- **Sandboxed execution**: Scripts run in a subprocess with limited access + +## Complete Example + +Here's a complete example with a skill that searches for research papers: + +### Skill Structure + +``` +skills/ +└── arxiv-search/ + ├── SKILL.md + └── scripts/ + └── arxiv_search.py +``` + +### SKILL.md + +```markdown +--- +name: arxiv-search +description: Search arXiv for research papers by query +--- + +# arXiv Search + +Search the arXiv preprint server for academic papers. + +## Usage + +Use `run_skill_script` with: + +- **script_name**: "arxiv_search" +- **args**: ["your search query", "--max-papers", "5"] + +## Example + +To find papers about transformers: +``` + +run_skill_script("arxiv-search", "arxiv_search", ["transformers attention mechanism", "--max-papers", "3"]) + +``` + +``` + +### arxiv_search.py + +```python +#!/usr/bin/env python3 +"""Search arXiv for papers.""" + +import argparse +import urllib.request +import urllib.parse +import xml.etree.ElementTree as ET + +def search_arxiv(query: str, max_results: int = 5) -> list[dict]: + """Search arXiv API.""" + base_url = "http://export.arxiv.org/api/query" + params = { + "search_query": f"all:{query}", + "start": 0, + "max_results": max_results, + "sortBy": "submittedDate", + "sortOrder": "descending", + } + url = f"{base_url}?{urllib.parse.urlencode(params)}" + + with urllib.request.urlopen(url) as response: + data = response.read() + + root = ET.fromstring(data) + ns = {"atom": "http://www.w3.org/2005/Atom"} + + results = [] + for entry in root.findall("atom:entry", ns): + title = entry.find("atom:title", ns).text.strip() + summary = entry.find("atom:summary", ns).text.strip()[:200] + link = entry.find("atom:id", ns).text + results.append({"title": title, "summary": summary, "link": link}) + + return results + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("query", help="Search query") + parser.add_argument("--max-papers", type=int, default=5) + args = parser.parse_args() + + results = search_arxiv(args.query, args.max_papers) + + for i, paper in enumerate(results, 1): + print(f"{i}. {paper['title']}") + print(f" {paper['summary']}...") + print(f" Link: {paper['link']}") + print() + +if __name__ == "__main__": + main() +``` + +### Agent Code + +```python +import asyncio +from pydantic_ai import Agent, SkillsToolset + +async def main(): + skills_toolset = SkillsToolset(directories=["./skills"]) + + agent = Agent( + model='openai:gpt-4o', + instructions="You are a research assistant.", + toolsets=[skills_toolset] + ) + + @agent.system_prompt + async def add_skills(): + return skills_toolset.get_skills_system_prompt() + + result = await agent.run( + "Find the 3 most recent papers about large language models" + ) + print(result.output) + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## References + +This implementation is inspired by: + +- [langchain-ai/deepagents](https://github.com/langchain-ai/deepagents/tree/master) +- [vstorm-co/pydantic-deepagents](https://github.com/vstorm-co/pydantic-deepagents/tree/main) +- [Introducing Agent Skills | Anthropic](https://www.anthropic.com/news/agent-skills) +- [Using skills with Deep Agents | LangChain](https://blog.langchain.com/using-skills-with-deep-agents/) diff --git a/examples/pydantic_ai_examples/skills/arxiv-search/SKILL.md b/examples/pydantic_ai_examples/skills/arxiv-search/SKILL.md new file mode 100644 index 0000000000..0738d7aa01 --- /dev/null +++ b/examples/pydantic_ai_examples/skills/arxiv-search/SKILL.md @@ -0,0 +1,75 @@ +--- +name: arxiv-search +description: Search arXiv preprint repository for papers in physics, mathematics, computer science, quantitative biology, and related fields. +--- + +# arXiv Search Skill + +This skill provides access to arXiv, a free distribution service and open-access archive for scholarly articles in physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering, systems science, and economics. + +## When to Use This Skill + +Use this skill when you need to: + +- Find preprints and recent research papers before journal publication +- Search for papers in computational biology, bioinformatics, or systems biology +- Access mathematical or statistical methods papers relevant to biology +- Find machine learning papers applied to biological problems +- Get the latest research that may not yet be in PubMed + +## Skill Scripts + +### arxiv_search + +The `arxiv_search` script accepts the following arguments: + +- First argument (required): Search query string (e.g., "neural networks protein structure", "single cell RNA-seq") +- `--max-papers` (optional): Maximum number of papers to retrieve (default: 10) + +### Usage Pattern + +Use the `run_skill_script` tool to execute the `arxiv_search` script. For example: + +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["your search query", "--max-papers", "5"] +) +``` + +Search for computational biology papers (default 10 results): + +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["protein folding prediction"] +) +``` + +Search for machine learning papers with limited results: + +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["transformer attention mechanism", "--max-papers", "3"] +) +``` + +## Output Format + +The script returns formatted results with: + +- Paper title +- Summary/abstract +- arXiv URL + +## Dependencies + +This script requires the `arxiv` package. Install with: + +```bash +pip install arxiv +``` diff --git a/examples/pydantic_ai_examples/skills/arxiv-search/scripts/arxiv_search.py b/examples/pydantic_ai_examples/skills/arxiv-search/scripts/arxiv_search.py new file mode 100644 index 0000000000..1225e70665 --- /dev/null +++ b/examples/pydantic_ai_examples/skills/arxiv-search/scripts/arxiv_search.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""arXiv Search. + +Searches the arXiv preprint repository for research papers. +""" + +import argparse + + +def query_arxiv(query: str, max_papers: int = 10) -> str: + """Query arXiv for papers based on the provided search query. + + Parameters + ---------- + query : str + The search query string. + max_papers : int + The maximum number of papers to retrieve (default: 10). + + Returns: + The formatted search results or an error message. + """ + try: + import arxiv # pyright: ignore[reportMissingImports] + except ImportError: + return 'Error: arxiv package not installed. Install with: pip install arxiv' + + try: + client = arxiv.Client() # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + search = arxiv.Search( # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + query=query, + max_results=max_papers, + sort_by=arxiv.SortCriterion.Relevance, # pyright: ignore[reportUnknownMemberType] + ) + results = '\n\n'.join( + [ + f'Title: {paper.title}\nSummary: {paper.summary}\nURL: {paper.entry_id}' # pyright: ignore[reportUnknownMemberType] + for paper in client.results(search) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + ] + ) + return results if results else 'No papers found on arXiv.' + except Exception as e: + return f'Error querying arXiv: {e}' + + +def main() -> None: + """Main function to parse arguments and perform arXiv search.""" + parser = argparse.ArgumentParser(description='Search arXiv for research papers') + parser.add_argument('query', type=str, help='Search query string') + parser.add_argument( + '--max-papers', + type=int, + default=10, + help='Maximum number of papers to retrieve (default: 10)', + ) + + args = parser.parse_args() + + print(query_arxiv(args.query, max_papers=args.max_papers)) + + +if __name__ == '__main__': + main() diff --git a/examples/pydantic_ai_examples/skills/pydanticai-docs/SKILL.md b/examples/pydantic_ai_examples/skills/pydanticai-docs/SKILL.md new file mode 100644 index 0000000000..74ea82ee66 --- /dev/null +++ b/examples/pydantic_ai_examples/skills/pydanticai-docs/SKILL.md @@ -0,0 +1,131 @@ +--- +name: pydanticai-docs +description: Use this skill for requests related to Pydantic AI framework - building agents, tools, dependencies, structured outputs, and model integrations. +--- + +# Pydantic AI Documentation Skill + +## Overview + +This skill provides guidance for using **Pydantic AI** - a Python agent framework for building production-grade Generative AI applications. Pydantic AI emphasizes type safety, dependency injection, and structured outputs. + +## Key Concepts + +### Agents + +Agents are the primary interface for interacting with LLMs. They contain: + +- **Instructions**: System prompts for the LLM +- **Tools**: Functions the LLM can call +- **Output Type**: Structured datatype the LLM must return +- **Dependencies**: Data/services injected into tools and prompts + +### Models + +Supported models include: + +- OpenAI: `openai:gpt-4o`, `openai:gpt-5` +- Anthropic: `anthropic:claude-sonnet-4-5` +- Google: `google:gemini-2.0-flash` +- Groq, Azure, Together AI, DeepSeek, Grok, and more + +### Tools + +Two types of tools: + +- `@agent.tool`: Receives `RunContext` with dependencies +- `@agent.tool_plain`: Plain function without context + +### Toolsets + +Collections of tools that can be registered with agents: + +- `FunctionToolset`: Group multiple tools +- `SkillsToolset`: Progressive skill discovery +- `MCPServerTool`: Model Context Protocol servers +- Third-party toolsets (ACI.dev, etc.) + +## Instructions + +### 1. Fetch Full Documentation + +For the most accurate and up-to-date information, always fetch the full documentation: + +``` +https://ai.pydantic.dev/llms-full.txt +``` + +### 2. Quick Examples + +**Basic Agent:** + +```python +from pydantic_ai import Agent + +agent = Agent('openai:gpt-4o', instructions='You are a helpful assistant.') +result = agent.run_sync('Hello!') +print(result.output) +``` + +**With Structured Output:** + +```python +from pydantic import BaseModel +from pydantic_ai import Agent + +class CityInfo(BaseModel): + name: str + country: str + population: int + +agent = Agent('openai:gpt-4o', output_type=CityInfo) +result = agent.run_sync('Tell me about Paris') +print(result.output) # CityInfo(name='Paris', country='France', population=...) +``` + +**With Tools:** + +```python +from pydantic_ai import Agent, RunContext + +agent = Agent('openai:gpt-4o') + +@agent.tool +async def get_weather(ctx: RunContext[str], city: str) -> str: + # Your implementation + return f"Weather in {city}: Sunny, 22°C" + +result = await agent.run('What is the weather in London?') +``` + +**With Dependencies:** + +```python +from dataclasses import dataclass +from pydantic_ai import Agent, RunContext + +@dataclass +class AppDeps: + api_key: str + user_id: str + +agent = Agent('openai:gpt-4o', deps_type=AppDeps) + +@agent.tool +async def get_user_data(ctx: RunContext[AppDeps]) -> str: + return f"User: {ctx.deps.user_id}" + +result = await agent.run('Get my data', deps=AppDeps(api_key='...', user_id='123')) +``` + +## When to Use This Skill + +Use this skill when the user asks about: + +- How to build agents with Pydantic AI +- Tool definitions and toolsets +- Dependency injection patterns +- Structured outputs with Pydantic models +- Model configuration and providers +- Streaming responses +- Testing agents diff --git a/examples/pydantic_ai_examples/skills_agent.py b/examples/pydantic_ai_examples/skills_agent.py new file mode 100644 index 0000000000..f0fdf784ea --- /dev/null +++ b/examples/pydantic_ai_examples/skills_agent.py @@ -0,0 +1,43 @@ +"""Skills integration example demonstrating progressive skill discovery with Pydantic AI. + +This example shows how to create an agent with skills that can: +- List available skills +- Load detailed skill instructions on demand +- Read additional resources +- Execute skill scripts +""" + +import asyncio +from pathlib import Path + +from pydantic_ai import Agent +from pydantic_ai.toolsets import SkillsToolset + + +async def main() -> None: + """Pydantic AI with Agent Skills.""" + # Get the skills directory (examples/pydantic_ai_examples/skills) + skills_dir = Path(__file__).parent / 'skills' + + # Initialize Skills Toolset + skills_toolset = SkillsToolset(directories=[skills_dir]) + + # Create agent with skills + agent = Agent( + model='openai:gpt-4o', + instructions='You are a helpful research assistant.', + toolsets=[skills_toolset], + ) + + # Add skills system prompt (includes skill descriptions and usage instructions) + @agent.system_prompt + async def add_skills_prompt() -> str: # pyright: ignore[reportUnusedFunction] + return skills_toolset.get_skills_system_prompt() + + # Use agent - skills tools are available for the agent to call + result = await agent.run('What are the main features of Pydantic AI framework?') + print(f'\nResponse:\n{result.output}') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/mkdocs.yml b/mkdocs.yml index a1c944da65..6c98ce6a4d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,6 +41,7 @@ nav: - tools.md - tools-advanced.md - toolsets.md + - skills.md - deferred-tools.md - builtin-tools.md - common-tools.md diff --git a/pydantic_ai_slim/pydantic_ai/__init__.py b/pydantic_ai_slim/pydantic_ai/__init__.py index dfecd6288f..a483f4b804 100644 --- a/pydantic_ai_slim/pydantic_ai/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/__init__.py @@ -108,6 +108,7 @@ PrefixedToolset, PreparedToolset, RenamedToolset, + SkillsToolset, ToolsetFunc, ToolsetTool, WrapperToolset, @@ -212,6 +213,7 @@ 'PrefixedToolset', 'PreparedToolset', 'RenamedToolset', + 'SkillsToolset', 'ToolsetFunc', 'ToolsetTool', 'WrapperToolset', diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/__init__.py b/pydantic_ai_slim/pydantic_ai/toolsets/__init__.py index a5228ca91a..67a717c1ee 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/__init__.py @@ -8,6 +8,20 @@ from .prefixed import PrefixedToolset from .prepared import PreparedToolset from .renamed import RenamedToolset +from .skills import ( + Skill, + SkillException, + SkillMetadata, + SkillNotFoundError, + SkillResource, + SkillResourceLoadError, + SkillScript, + SkillScriptExecutionError, + SkillsToolset, + SkillValidationError, + discover_skills, + parse_skill_md, +) from .wrapper import WrapperToolset __all__ = ( @@ -24,4 +38,17 @@ 'PreparedToolset', 'WrapperToolset', 'ApprovalRequiredToolset', + # Skills toolset + 'SkillsToolset', + 'Skill', + 'SkillMetadata', + 'SkillResource', + 'SkillScript', + 'discover_skills', + 'parse_skill_md', + 'SkillException', + 'SkillNotFoundError', + 'SkillResourceLoadError', + 'SkillScriptExecutionError', + 'SkillValidationError', ) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py new file mode 100644 index 0000000000..76860b5bff --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py @@ -0,0 +1,64 @@ +"""Skills toolset for Pydantic AI. + +This module provides a standardized, composable framework for building and managing +Agent Skills within the Pydantic AI ecosystem. Agent Skills are modular collections +of instructions, scripts, tools, and resources that enable AI agents to progressively +discover, load, and execute specialized capabilities for domain-specific tasks. + +Example: + ```python + from pydantic_ai import Agent, SkillsToolset + + # Initialize Skills Toolset with one or more skill directories + skills_toolset = SkillsToolset(directories=["./skills"]) + + # Create agent with skills as a toolset + agent = Agent( + model='openai:gpt-4o', + instructions="You are a helpful research assistant.", + toolsets=[skills_toolset] + ) + + # Add skills system prompt to agent + @agent.system_prompt + def add_skills_to_system_prompt() -> str: + return skills_toolset.get_skills_system_prompt() + + # Use agent - skills tools are available for the agent to call + result = await agent.run( + "What are the last 3 papers on arXiv about machine learning?" + ) + print(result.output) + ``` +""" + +from pydantic_ai.toolsets.skills._discovery import discover_skills, parse_skill_md, validate_skill_metadata +from pydantic_ai.toolsets.skills._exceptions import ( + SkillException, + SkillNotFoundError, + SkillResourceLoadError, + SkillScriptExecutionError, + SkillValidationError, +) +from pydantic_ai.toolsets.skills._toolset import SkillsToolset +from pydantic_ai.toolsets.skills._types import Skill, SkillMetadata, SkillResource, SkillScript + +__all__ = ( + # Main toolset + 'SkillsToolset', + # Types + 'Skill', + 'SkillMetadata', + 'SkillResource', + 'SkillScript', + # Discovery + 'discover_skills', + 'parse_skill_md', + 'validate_skill_metadata', + # Exceptions + 'SkillException', + 'SkillNotFoundError', + 'SkillResourceLoadError', + 'SkillScriptExecutionError', + 'SkillValidationError', +) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py new file mode 100644 index 0000000000..5c4919ac07 --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py @@ -0,0 +1,307 @@ +"""Skill discovery and parsing utilities. + +This module provides functions for discovering skills from filesystem directories +and parsing SKILL.md files with YAML frontmatter. +""" + +from __future__ import annotations + +import logging +import re +from collections.abc import Sequence +from pathlib import Path +from typing import Any + +import yaml + +from ._exceptions import SkillValidationError +from ._types import ( + Skill, + SkillMetadata, + SkillResource, + SkillScript, +) + +logger = logging.getLogger('pydantic_ai.skills') + +# Anthropic's naming convention: lowercase letters, numbers, and hyphens only +SKILL_NAME_PATTERN = re.compile(r'^[a-z0-9-]+$') +RESERVED_WORDS = {'anthropic', 'claude'} + + +def validate_skill_metadata( + frontmatter: dict[str, Any], + instructions: str, +) -> list[str]: + """Validate skill metadata against Anthropic's requirements. + + Args: + frontmatter: Parsed YAML frontmatter. + instructions: The skill instructions content. + + Returns: + List of validation warnings (empty if no issues). + """ + warnings_list: list[str] = [] + + name = frontmatter.get('name', '') + description = frontmatter.get('description', '') + + # Validate name format + if name: + # Check length first to prevent regex on excessively long strings + if len(name) > 64: + warnings_list.append(f"Skill name '{name}' exceeds 64 characters ({len(name)} chars)") + # Only run regex if name is reasonable length (defense in depth) + elif not SKILL_NAME_PATTERN.match(name): + warnings_list.append(f"Skill name '{name}' should contain only lowercase letters, numbers, and hyphens") + # Check for reserved words + for reserved in RESERVED_WORDS: + if reserved in name: + warnings_list.append(f"Skill name '{name}' contains reserved word '{reserved}'") + + # Validate description + if description and len(description) > 1024: + warnings_list.append(f'Skill description exceeds 1024 characters ({len(description)} chars)') + + # Validate instructions length (Anthropic recommends under 500 lines) + lines = instructions.split('\n') + if len(lines) > 500: + warnings_list.append( + f'SKILL.md body exceeds recommended 500 lines ({len(lines)} lines). ' + f'Consider splitting into separate resource files.' + ) + + return warnings_list + + +def parse_skill_md(content: str) -> tuple[dict[str, Any], str]: + """Parse a SKILL.md file into frontmatter and instructions. + + Uses PyYAML for robust YAML parsing. + + Args: + content: Full content of the SKILL.md file. + + Returns: + Tuple of (frontmatter_dict, instructions_markdown). + + Raises: + SkillValidationError: If YAML parsing fails. + """ + # Match YAML frontmatter between --- delimiters + frontmatter_pattern = r'^---\s*\n(.*?)^---\s*\n' + match = re.search(frontmatter_pattern, content, re.DOTALL | re.MULTILINE) + + if not match: + # No frontmatter, treat entire content as instructions + return {}, content.strip() + + frontmatter_yaml = match.group(1).strip() + instructions = content[match.end() :].strip() + + # Handle empty frontmatter + if not frontmatter_yaml: + return {}, instructions + + try: + frontmatter_data = yaml.safe_load(frontmatter_yaml) + if frontmatter_data is None: + frontmatter: dict[str, Any] = {} + elif isinstance(frontmatter_data, dict): + frontmatter = frontmatter_data + else: + frontmatter = {} + except yaml.YAMLError as e: + raise SkillValidationError(f'Failed to parse YAML frontmatter: {e}') from e + + return frontmatter, instructions + + +def _discover_resources(skill_folder: Path) -> list[SkillResource]: + """Discover resource files in a skill folder. + + Resources are markdown files other than SKILL.md, plus any files + in a resources/ subdirectory. + + Args: + skill_folder: Path to the skill directory. + + Returns: + List of discovered SkillResource objects. + """ + resources: list[SkillResource] = [] + + # Find .md files other than SKILL.md (FORMS.md, REFERENCE.md, etc.) + for md_file in skill_folder.glob('*.md'): + if md_file.name.upper() != 'SKILL.MD': + resources.append( + SkillResource( + name=md_file.name, + path=md_file.resolve(), + ) + ) + + # Find files in resources/ subdirectory if it exists + resources_dir = skill_folder / 'resources' + if resources_dir.exists() and resources_dir.is_dir(): + for resource_file in resources_dir.rglob('*'): + if resource_file.is_file(): + rel_path = resource_file.relative_to(skill_folder) + resources.append( + SkillResource( + name=str(rel_path), + path=resource_file.resolve(), + ) + ) + + return resources + + +def _discover_scripts(skill_folder: Path, skill_name: str) -> list[SkillScript]: + """Discover executable scripts in a skill folder. + + Looks for Python scripts in: + - Directly in the skill folder (*.py) + - In a scripts/ subdirectory + + Args: + skill_folder: Path to the skill directory. + skill_name: Name of the parent skill. + + Returns: + List of discovered SkillScript objects. + """ + scripts: list[SkillScript] = [] + + # Find .py files in skill folder root (excluding __init__.py) + for py_file in skill_folder.glob('*.py'): + if py_file.name != '__init__.py': + scripts.append( + SkillScript( + name=py_file.stem, # filename without .py + path=py_file.resolve(), + skill_name=skill_name, + ) + ) + + # Find .py files in scripts/ subdirectory + scripts_dir = skill_folder / 'scripts' + if scripts_dir.exists() and scripts_dir.is_dir(): + for py_file in scripts_dir.glob('*.py'): + if py_file.name != '__init__.py': + scripts.append( + SkillScript( + name=py_file.stem, + path=py_file.resolve(), + skill_name=skill_name, + ) + ) + + return scripts + + +def discover_skills( + directories: Sequence[str | Path], + validate: bool = True, +) -> list[Skill]: + """Discover skills from filesystem directories. + + Searches for SKILL.md files in the given directories and loads + skill metadata and structure. + + Args: + directories: List of directory paths to search for skills. + validate: Whether to validate skill structure (requires name and description). + + Returns: + List of discovered Skill objects. + + Raises: + SkillValidationError: If validation is enabled and a skill is invalid. + """ + skills: list[Skill] = [] + + for skill_dir in directories: + dir_path = Path(skill_dir).expanduser().resolve() + + if not dir_path.exists(): + logger.warning('Skills directory does not exist: %s', dir_path) + continue + + if not dir_path.is_dir(): + logger.warning('Skills path is not a directory: %s', dir_path) + continue + + # Find all SKILL.md files (recursive search) + for skill_file in dir_path.glob('**/SKILL.md'): + try: + skill_folder = skill_file.parent + content = skill_file.read_text(encoding='utf-8') + frontmatter, instructions = parse_skill_md(content) + + # Get required fields + name = frontmatter.get('name') + description = frontmatter.get('description', '') + + # Validation + if validate: + if not name: + logger.warning( + 'Skill at %s missing required "name" field, skipping', + skill_folder, + ) + continue + if not description: + logger.warning( + 'Skill "%s" at %s missing "description" field', + name, + skill_folder, + ) + + # Use folder name if name not provided + if not name: + name = skill_folder.name + + # Extract extra metadata fields + extra = {k: v for k, v in frontmatter.items() if k not in ('name', 'description')} + + # Create metadata + metadata = SkillMetadata( + name=name, + description=description, + extra=extra, + ) + + # Validate metadata (log warnings) + if validate: + validation_warnings = validate_skill_metadata(frontmatter, instructions) + for warning in validation_warnings: + logger.warning('Skill "%s" at %s: %s', name, skill_folder, warning) + + # Discover resources and scripts + resources = _discover_resources(skill_folder) + scripts = _discover_scripts(skill_folder, name) + + # Create skill + skill = Skill( + name=name, + path=skill_folder.resolve(), + metadata=metadata, + content=instructions, + resources=resources, + scripts=scripts, + ) + + skills.append(skill) + logger.debug('Discovered skill: %s at %s', name, skill_folder) + + except SkillValidationError as e: + logger.exception('Skill validation error in %s: %s', skill_file, e) + raise + except OSError as e: + logger.warning('Failed to load skill from %s: %s', skill_file, e) + continue + + logger.info('Discovered %d skills from %d directories', len(skills), len(directories)) + return skills diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_exceptions.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_exceptions.py new file mode 100644 index 0000000000..e301d5a984 --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_exceptions.py @@ -0,0 +1,21 @@ +from __future__ import annotations + + +class SkillException(Exception): + """Base exception for skill-related errors.""" + + +class SkillNotFoundError(SkillException): + """Skill not found in any source.""" + + +class SkillValidationError(SkillException): + """Skill validation failed.""" + + +class SkillResourceLoadError(SkillException): + """Failed to load skill resources.""" + + +class SkillScriptExecutionError(SkillException): + """Skill script execution failed.""" diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py new file mode 100644 index 0000000000..36f1c26c4f --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -0,0 +1,424 @@ +"""Skills toolset implementation. + +This module provides the main SkillsToolset class that integrates +skill discovery and management with Pydantic AI agents. +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path +from typing import Any + +import anyio + +from ..._run_context import RunContext +from ..function import FunctionToolset +from ._discovery import discover_skills +from ._exceptions import ( + SkillNotFoundError, + SkillResourceLoadError, + SkillScriptExecutionError, +) +from ._types import Skill + +logger = logging.getLogger('pydantic_ai.skills') + + +def _is_safe_path(base_path: Path, target_path: Path) -> bool: + """Check if target_path is safely within base_path (no path traversal). + + Args: + base_path: The base directory path. + target_path: The target path to validate. + + Returns: + True if target_path is within base_path, False otherwise. + """ + try: + target_path.resolve().relative_to(base_path.resolve()) + return True + except ValueError: + return False + + +class SkillsToolset(FunctionToolset): + """Pydantic AI toolset for automatic skill discovery and integration. + + See [skills docs](../skills.md) for more information. + + This is the primary interface for integrating skills with Pydantic AI agents. + It implements the toolset protocol and automatically discovers, loads, and + registers skills from specified directories. + + Provides the following tools to agents: + - list_skills(): List all available skills + - load_skill(skill_name): Load a specific skill's instructions + - read_skill_resource(skill_name, resource_name): Read a skill resource file + - run_skill_script(skill_name, script_name, args): Execute a skill script + + Example: + ```python + from pydantic_ai import Agent, SkillsToolset + + skills_toolset = SkillsToolset(directories=["./skills"]) + + agent = Agent( + model='openai:gpt-4o', + instructions="You are a helpful assistant.", + toolsets=[skills_toolset] + ) + + @agent.system_prompt + def add_skills_prompt() -> str: + return skills_toolset.get_skills_system_prompt() + ``` + """ + + def __init__( + self, + directories: list[str | Path], + *, + auto_discover: bool = True, + validate: bool = True, + id: str | None = None, + script_timeout: int = 30, + python_executable: str | Path | None = None, + ) -> None: + """Initialize the skills toolset. + + Args: + directories: List of directory paths to search for skills. + auto_discover: Automatically discover and load skills on init. + validate: Validate skill structure and metadata on load. + id: Unique identifier for this toolset. + script_timeout: Timeout in seconds for script execution (default: 30). + python_executable: Path to Python executable for running scripts. + If None, uses sys.executable (default). + """ + super().__init__(id=id) + + self._directories = [Path(d) for d in directories] + self._validate = validate + self._script_timeout = script_timeout + self._python_executable = str(python_executable) if python_executable else sys.executable + self._skills: dict[str, Skill] = {} + + if auto_discover: + self._discover_skills() + + # Register tools + self._register_tools() + + def _discover_skills(self) -> None: + """Discover and load skills from configured directories.""" + skills = discover_skills( + directories=self._directories, + validate=self._validate, + ) + self._skills = {skill.name: skill for skill in skills} + + def _register_tools(self) -> None: # noqa: C901 + """Register skill management tools with the toolset. + + This method registers all four skill management tools: + - list_skills: List available skills + - load_skill: Load skill instructions + - read_skill_resource: Read skill resources + - run_skill_script: Execute skill scripts + """ + + @self.tool + async def list_skills(_ctx: RunContext[Any]) -> str: # pyright: ignore[reportUnusedFunction] + """List all available skills with their descriptions. + + Only use this tool if the available skills are not in your system prompt. + + Returns: + Formatted list of available skills with names and descriptions. + """ + if not self._skills: + return 'No skills available.' + + lines = ['# Available Skills', ''] + + for name, skill in sorted(self._skills.items()): + lines.append(f'{name}: {skill.metadata.description}') + + return '\n'.join(lines) + + @self.tool + async def load_skill(ctx: RunContext[Any], skill_name: str) -> str: # noqa: D417 # pyright: ignore[reportUnusedFunction] + """Load full instructions for a skill. + + Always load the skill before using read_skill_resource + or run_skill_script to understand the skill's capabilities, available + resources, scripts, and their usage patterns. + + Args: + skill_name: Name of the skill to load. + + Returns: + Full skill instructions including available resources and scripts. + """ + _ = ctx # Required by Pydantic AI toolset protocol + if skill_name not in self._skills: + available = ', '.join(sorted(self._skills.keys())) or 'none' + return f"Error: Skill '{skill_name}' not found. Available skills: {available}" + + skill = self._skills[skill_name] + logger.info('Loading skill: %s', skill_name) + + lines = [ + f'# Skill: {skill.name}', + f'**Description:** {skill.metadata.description}', + f'**Path:** {skill.path}', + '', + ] + + # Add resource list if available + if skill.resources: + lines.append('**Available Resources:**') + for resource in skill.resources: + lines.append(f'- {resource.name}') + lines.append('') + + # Add scripts list if available + if skill.scripts: + lines.append('**Available Scripts:**') + for script in skill.scripts: + lines.append(f'- {script.name}') + lines.append('') + + lines.append('---') + lines.append('') + lines.append(skill.content) + + return '\n'.join(lines) + + @self.tool + async def read_skill_resource( # noqa: D417 # pyright: ignore[reportUnusedFunction] + ctx: RunContext[Any], + skill_name: str, + resource_name: str, + ) -> str: + """Read a resource file from a skill (e.g., FORMS.md, REFERENCE.md). + + Call load_skill first to see which resources are available. + + Args: + skill_name: Name of the skill. + resource_name: The resource filename (e.g., "FORMS.md"). + + Returns: + The resource file content. + """ + _ = ctx # Required by Pydantic AI toolset protocol + if skill_name not in self._skills: + return f"Error: Skill '{skill_name}' not found." + + skill = self._skills[skill_name] + + # Find the resource + resource = None + for r in skill.resources: + if r.name == resource_name: + resource = r + break + + if resource is None: + available = [r.name for r in skill.resources] + return ( + f"Error: Resource '{resource_name}' not found in skill '{skill_name}'. " + f'Available resources: {available}' + ) + + # Security check + if not _is_safe_path(skill.path, resource.path): + logger.warning('Path traversal attempt detected: %s in %s', resource_name, skill_name) + return 'Error: Resource path escapes skill directory.' + + try: + content = resource.path.read_text(encoding='utf-8') + logger.info('Read resource: %s from skill %s', resource_name, skill_name) + return content + except OSError as e: + logger.error('Failed to read resource %s: %s', resource_name, e) + raise SkillResourceLoadError(f"Failed to read resource '{resource_name}': {e}") from e + + @self.tool + async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunction] + ctx: RunContext[Any], + skill_name: str, + script_name: str, + args: list[str] | None = None, + ) -> str: + """Execute a skill script with command-line arguments. + + Call load_skill first to understand the script's expected arguments, + usage patterns, and example invocations. Running scripts without + loading instructions first will likely fail. + + Args: + skill_name: Name of the skill. + script_name: The script name (without .py extension). + args: Optional list of command-line arguments (positional args, flags, values). + + Returns: + The script's output (stdout and stderr combined). + """ + _ = ctx # Required by Pydantic AI toolset protocol + if skill_name not in self._skills: + return f"Error: Skill '{skill_name}' not found." + + skill = self._skills[skill_name] + + # Find the script + script = None + for s in skill.scripts: + if s.name == script_name: + script = s + break + + if script is None: + available = [s.name for s in skill.scripts] + return ( + f"Error: Script '{script_name}' not found in skill '{skill_name}'. Available scripts: {available}" + ) + + # Security check + if not _is_safe_path(skill.path, script.path): + logger.warning('Path traversal attempt detected: %s in %s', script_name, skill_name) + return 'Error: Script path escapes skill directory.' + + # Build command + cmd = [self._python_executable, str(script.path)] + if args: + cmd.extend(args) + + logger.info('Running script: %s with args: %s', script_name, args) + + try: + # Use anyio.run_process for async-compatible execution + result = None + with anyio.move_on_after(self._script_timeout) as scope: + result = await anyio.run_process( + cmd, + check=False, # We handle return codes manually + cwd=str(skill.path), + ) + + # Check if timeout was reached + if scope.cancelled_caught: + logger.error('Script %s timed out after %d seconds', script_name, self._script_timeout) + raise SkillScriptExecutionError( + f"Script '{script_name}' timed out after {self._script_timeout} seconds" + ) + + # At this point, result should be set (timeout check passed) + assert result is not None + + # Decode output from bytes to string + output = result.stdout.decode('utf-8', errors='replace') + if result.stderr: + stderr = result.stderr.decode('utf-8', errors='replace') + output += f'\n\nStderr:\n{stderr}' + + if result.returncode != 0: + output += f'\n\nScript exited with code {result.returncode}' + + return output.strip() or '(no output)' + + except OSError as e: + logger.error('Failed to execute script %s: %s', script_name, e) + raise SkillScriptExecutionError(f"Failed to execute script '{script_name}': {e}") from e + + def get_skills_system_prompt(self) -> str: + """Get the combined system prompt from all loaded skills. + + This should be added to the agent's system prompt to provide + skill discovery and usage instructions. + + Following Anthropic's approach, this includes all skill metadata upfront + in the system prompt, enabling the agent to discover and select skills + without needing to call list_skills() first. + + Returns: + Formatted system prompt containing: + - All skill metadata (name + description) + - Instructions for using skill tools + - Progressive disclosure guidance + """ + if not self._skills: + return '' + + lines = [ + '# Skills', + '', + 'You have access to skills that extend your capabilities. Skills are modular packages', + 'containing instructions, resources, and scripts for specialized tasks.', + '', + '## Available Skills', + '', + 'The following skills are available to you. Use them when relevant to the task:', + '', + ] + + # List all skills with descriptions + for name, skill in sorted(self._skills.items()): + lines.append(f'- **{name}**: {skill.metadata.description}') + + lines.extend( + [ + '', + '## How to Use Skills', + '', + '**Progressive disclosure**: Load skill information only when needed.', + '', + '1. **When a skill is relevant to the current task**: Use `load_skill(skill_name)` to read the full instructions.', + '2. **For additional documentation**: Use `read_skill_resource(skill_name, resource_name)` to read FORMS.md, REFERENCE.md, or other resources.', + '3. **To execute skill scripts**: Use `run_skill_script(skill_name, script_name, args)` with appropriate command-line arguments.', + '', + '**Best practices**:', + '- Select skills based on task relevance and descriptions listed above', + '- Use progressive disclosure: load only what you need, when you need it, starting with load_skill', + "- Follow the skill's documented usage patterns and examples", + '', + ] + ) + + return '\n'.join(lines) + + @property + def skills(self) -> dict[str, Skill]: + """Get the dictionary of loaded skills. + + Returns: + Dictionary mapping skill names to Skill objects. + """ + return self._skills + + def get_skill(self, name: str) -> Skill: + """Get a specific skill by name. + + Args: + name: The skill name. + + Returns: + The Skill object. + + Raises: + SkillNotFoundError: If the skill is not found. + """ + if name not in self._skills: + raise SkillNotFoundError(f"Skill '{name}' not found") + return self._skills[name] + + def refresh(self) -> None: + """Re-discover skills from configured directories. + + Call this method to reload skills after changes to the filesystem. + """ + logger.info('Refreshing skills from directories') + self._discover_skills() diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_types.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_types.py new file mode 100644 index 0000000000..b75dda6985 --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_types.py @@ -0,0 +1,90 @@ +"""Type definitions for skills toolset. + +This module contains dataclass-based type definitions for skills, +their metadata, resources, and scripts. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +@dataclass +class SkillMetadata: + """Skill metadata from SKILL.md frontmatter. + + Only `name` and `description` are required. Other fields + (version, author, category, tags, etc.) can be added dynamically + based on frontmatter content. + + Attributes: + name: The skill identifier. + description: Brief description of what the skill does. + extra: Additional metadata fields from frontmatter. + """ + + name: str + description: str + extra: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class SkillResource: + """A resource file within a skill (e.g., FORMS.md, REFERENCE.md). + + Attributes: + name: Resource filename (e.g., "FORMS.md"). + path: Absolute path to the resource file. + content: Loaded content (lazy-loaded, None until read). + """ + + name: str + path: Path + content: str | None = None + + +@dataclass +class SkillScript: + """An executable script within a skill. + + Script-based tools: Executable Python scripts in scripts/ directory + or directly in the skill directory. + Can be executed via SkillsToolset.run_skill_script() tool. + + Attributes: + name: Script name without .py extension. + path: Absolute path to the script file. + skill_name: Parent skill name. + """ + + name: str + path: Path + skill_name: str + + +@dataclass +class Skill: + """A loaded skill instance. + + Attributes: + name: Skill name (from metadata). + path: Absolute path to skill directory. + metadata: Parsed metadata from SKILL.md. + content: Main content from SKILL.md (without frontmatter). + resources: Optional resource files (FORMS.md, etc.). + scripts: Available scripts in the skill directory or scripts/ subdirectory. + """ + + name: str + path: Path + metadata: SkillMetadata + content: str + resources: list[SkillResource] = field(default_factory=list) + scripts: list[SkillScript] = field(default_factory=list) + + @property + def description(self) -> str: + """Get skill description from metadata.""" + return self.metadata.description diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml index 8d196a813b..9f2afa37bd 100644 --- a/pydantic_ai_slim/pyproject.toml +++ b/pydantic_ai_slim/pyproject.toml @@ -61,6 +61,7 @@ dependencies = [ "opentelemetry-api>=1.28.0", "typing-inspection>=0.4.0", "genai-prices>=0.0.40", + "pyyaml>=6.0", ] [tool.hatch.metadata.hooks.uv-dynamic-versioning.optional-dependencies] diff --git a/tests/test_skills.py b/tests/test_skills.py new file mode 100644 index 0000000000..223e85a0a4 --- /dev/null +++ b/tests/test_skills.py @@ -0,0 +1,900 @@ +"""Tests for skills toolset.""" + +from pathlib import Path + +import pytest +from inline_snapshot import snapshot + +from pydantic_ai.toolsets.skills import ( + Skill, + SkillMetadata, + SkillNotFoundError, + SkillResource, + SkillScript, + SkillsToolset, + SkillValidationError, + discover_skills, + parse_skill_md, + validate_skill_metadata, +) + +pytestmark = pytest.mark.anyio + + +# ==================== Fixtures ==================== + + +@pytest.fixture +def sample_skills_dir(tmp_path: Path) -> Path: + """Create a temporary directory with sample skills.""" + # Create skill 1 + skill1_dir = tmp_path / 'skill-one' + skill1_dir.mkdir() + (skill1_dir / 'SKILL.md').write_text("""--- +name: skill-one +description: First test skill for basic operations +--- + +# Skill One + +Use this skill for basic operations. + +## Instructions + +1. Do something simple +2. Return results +""") + + # Create skill 2 with resources + skill2_dir = tmp_path / 'skill-two' + skill2_dir.mkdir() + (skill2_dir / 'SKILL.md').write_text("""--- +name: skill-two +description: Second test skill with resources +--- + +# Skill Two + +Advanced skill with resources. + +See FORMS.md for details. +""") + (skill2_dir / 'FORMS.md').write_text('# Forms\n\nForm filling guide.') + (skill2_dir / 'REFERENCE.md').write_text('# API Reference\n\nDetailed reference.') + + # Create skill 3 with scripts + skill3_dir = tmp_path / 'skill-three' + skill3_dir.mkdir() + (skill3_dir / 'SKILL.md').write_text("""--- +name: skill-three +description: Third test skill with executable scripts +--- + +# Skill Three + +Skill with executable scripts. +""") + + scripts_dir = skill3_dir / 'scripts' + scripts_dir.mkdir() + (scripts_dir / 'hello.py').write_text("""#!/usr/bin/env python3 +import sys +print(f"Hello, {sys.argv[1] if len(sys.argv) > 1 else 'World'}!") +""") + (scripts_dir / 'echo.py').write_text("""#!/usr/bin/env python3 +import sys +print(' '.join(sys.argv[1:])) +""") + + return tmp_path + + +# ==================== Type Tests ==================== + + +def test_skill_metadata_creation() -> None: + """Test creating SkillMetadata with required fields.""" + metadata = SkillMetadata(name='test-skill', description='A test skill') + + assert metadata.name == 'test-skill' + assert metadata.description == 'A test skill' + assert metadata.extra == {} + + +def test_skill_metadata_with_extra_fields() -> None: + """Test SkillMetadata with additional fields.""" + metadata = SkillMetadata( + name='test-skill', description='A test skill', extra={'version': '1.0.0', 'author': 'Test Author'} + ) + + assert metadata.extra['version'] == '1.0.0' + assert metadata.extra['author'] == 'Test Author' + + +def test_skill_resource_creation() -> None: + """Test creating SkillResource.""" + resource = SkillResource(name='FORMS.md', path=Path('/tmp/skill/FORMS.md')) + + assert resource.name == 'FORMS.md' + assert resource.path == Path('/tmp/skill/FORMS.md') + assert resource.content is None + + +def test_skill_script_creation() -> None: + """Test creating SkillScript.""" + script = SkillScript(name='test_script', path=Path('/tmp/skill/scripts/test_script.py'), skill_name='test-skill') + + assert script.name == 'test_script' + assert script.path == Path('/tmp/skill/scripts/test_script.py') + assert script.skill_name == 'test-skill' + + +def test_skill_creation() -> None: + """Test creating a complete Skill.""" + metadata = SkillMetadata(name='test-skill', description='A test skill') + resource = SkillResource(name='FORMS.md', path=Path('/tmp/skill/FORMS.md')) + script = SkillScript(name='test_script', path=Path('/tmp/skill/scripts/test_script.py'), skill_name='test-skill') + + skill = Skill( + name='test-skill', + path=Path('/tmp/skill'), + metadata=metadata, + content='# Instructions\n\nTest instructions.', + resources=[resource], + scripts=[script], + ) + + assert skill.name == 'test-skill' + assert skill.path == Path('/tmp/skill') + assert skill.metadata.name == 'test-skill' + assert skill.content == '# Instructions\n\nTest instructions.' + assert len(skill.resources) == 1 + assert len(skill.scripts) == 1 + + +# ==================== Parsing Tests ==================== + + +def test_parse_skill_md_with_frontmatter() -> None: + """Test parsing SKILL.md with valid frontmatter.""" + content = """--- +name: test-skill +description: A test skill for testing +version: 1.0.0 +--- + +# Test Skill + +This is the main content. +""" + + frontmatter, instructions = parse_skill_md(content) + + assert frontmatter['name'] == 'test-skill' + assert frontmatter['description'] == 'A test skill for testing' + assert frontmatter['version'] == '1.0.0' + assert instructions.startswith('# Test Skill') + + +def test_parse_skill_md_without_frontmatter() -> None: + """Test parsing SKILL.md without frontmatter.""" + content = """# Test Skill + +This skill has no frontmatter. +""" + + frontmatter, instructions = parse_skill_md(content) + + assert frontmatter == {} + assert instructions.startswith('# Test Skill') + + +def test_parse_skill_md_empty_frontmatter() -> None: + """Test parsing SKILL.md with empty frontmatter.""" + content = """--- +--- + +# Test Skill + +Content here. +""" + + frontmatter, instructions = parse_skill_md(content) + + assert frontmatter == {} + assert instructions.startswith('# Test Skill') + + +def test_parse_skill_md_invalid_yaml() -> None: + """Test parsing SKILL.md with invalid YAML.""" + content = """--- +name: test-skill +description: [unclosed array +--- + +Content. +""" + + with pytest.raises(SkillValidationError, match='Failed to parse YAML frontmatter'): + parse_skill_md(content) + + +def test_parse_skill_md_multiline_description() -> None: + """Test parsing SKILL.md with multiline description.""" + content = """--- +name: test-skill +description: | + This is a multiline + description for testing +--- + +# Content +""" + + frontmatter, _ = parse_skill_md(content) + + assert 'multiline' in frontmatter['description'] + assert 'description for testing' in frontmatter['description'] + + +def test_parse_skill_md_complex_frontmatter() -> None: + """Test parsing SKILL.md with complex frontmatter.""" + content = """--- +name: complex-skill +description: Complex skill with metadata +version: 2.0.0 +author: Test Author +tags: + - testing + - example +metadata: + category: test + priority: high +--- + +# Complex Skill +""" + + frontmatter, _ = parse_skill_md(content) + + assert frontmatter['name'] == 'complex-skill' + assert frontmatter['tags'] == ['testing', 'example'] + assert frontmatter['metadata']['category'] == 'test' + + +# ==================== Validation Tests ==================== + + +def testvalidate_skill_metadata_valid() -> None: + """Test validation with valid metadata.""" + frontmatter = { + 'name': 'test-skill', + 'description': 'A valid test skill', + } + warnings = validate_skill_metadata(frontmatter, 'Content here.') + assert len(warnings) == 0 + + +def testvalidate_skill_metadata_name_too_long() -> None: + """Test validation with name exceeding 64 characters.""" + frontmatter = { + 'name': 'a' * 65, + 'description': 'Test', + } + warnings = validate_skill_metadata(frontmatter, 'Content') + + assert len(warnings) == 1 + assert '64 characters' in warnings[0] + + +def testvalidate_skill_metadata_invalid_name_format() -> None: + """Test validation with invalid name format.""" + frontmatter = { + 'name': 'Invalid_Name_With_Underscores', + 'description': 'Test', + } + warnings = validate_skill_metadata(frontmatter, 'Content') + + assert len(warnings) >= 1 + assert any('lowercase letters, numbers, and hyphens' in w for w in warnings) + + +def testvalidate_skill_metadata_reserved_word() -> None: + """Test validation with reserved words in name.""" + frontmatter = { + 'name': 'anthropic-helper', + 'description': 'Test', + } + warnings = validate_skill_metadata(frontmatter, 'Content') + + assert len(warnings) >= 1 + assert any('reserved word' in w for w in warnings) + + +def testvalidate_skill_metadata_description_too_long() -> None: + """Test validation with description exceeding 1024 characters.""" + frontmatter = { + 'name': 'test-skill', + 'description': 'x' * 1025, + } + warnings = validate_skill_metadata(frontmatter, 'Content') + + assert len(warnings) >= 1 + assert any('1024 characters' in w for w in warnings) + + +def testvalidate_skill_metadata_instructions_too_long() -> None: + """Test validation with instructions exceeding 500 lines.""" + frontmatter = { + 'name': 'test-skill', + 'description': 'Test', + } + # Create content with 501 lines + instructions = '\n'.join([f'Line {i}' for i in range(501)]) + + warnings = validate_skill_metadata(frontmatter, instructions) + + assert len(warnings) >= 1 + assert any('500 lines' in w for w in warnings) + + +def testvalidate_skill_metadata_multiple_issues() -> None: + """Test validation with multiple issues.""" + frontmatter = { + 'name': 'A' * 65, # Too long + 'description': 'x' * 1025, # Too long + } + instructions = '\n'.join([f'Line {i}' for i in range(501)]) # Too many lines + + warnings = validate_skill_metadata(frontmatter, instructions) + + # Should have warnings for name, description, and instructions + assert len(warnings) >= 3 + + +def testvalidate_skill_metadata_good_naming_conventions() -> None: + """Test validation with valid naming conventions.""" + good_names = [ + 'processing-pdfs', + 'analyzing-spreadsheets', + 'test-skill-123', + 'pdf-processing', + 'skill-1', + ] + + for name in good_names: + frontmatter = {'name': name, 'description': 'Test'} + warnings = validate_skill_metadata(frontmatter, 'Content') + assert len(warnings) == 0, f"Name '{name}' should be valid" + + +def testvalidate_skill_metadata_bad_naming_conventions() -> None: + """Test validation with invalid naming conventions.""" + bad_names = [ + 'Invalid_Name', # Underscores + 'InvalidName', # Capital letters + 'invalid name', # Spaces + 'invalid.name', # Periods + 'claude-tools', # Reserved word + ] + + for name in bad_names: + frontmatter = {'name': name, 'description': 'Test'} + warnings = validate_skill_metadata(frontmatter, 'Content') + assert len(warnings) > 0, f"Name '{name}' should trigger warnings" + + +# ==================== Discovery Tests ==================== + + +def test_discover_skills_single_skill(tmp_path: Path) -> None: + """Test discovering a single skill.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + skill_md = skill_dir / 'SKILL.md' + skill_md.write_text("""--- +name: test-skill +description: A test skill +--- + +# Test Skill + +Instructions here. +""") + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 1 + assert skills[0].name == 'test-skill' + assert skills[0].description == 'A test skill' + assert 'Instructions here' in skills[0].content + + +def test_discover_skills_multiple_skills(tmp_path: Path) -> None: + """Test discovering multiple skills.""" + # Create first skill + skill1_dir = tmp_path / 'skill-one' + skill1_dir.mkdir() + (skill1_dir / 'SKILL.md').write_text("""--- +name: skill-one +description: First skill +--- + +Content 1. +""") + + # Create second skill + skill2_dir = tmp_path / 'skill-two' + skill2_dir.mkdir() + (skill2_dir / 'SKILL.md').write_text("""--- +name: skill-two +description: Second skill +--- + +Content 2. +""") + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 2 + skill_names = {s.name for s in skills} + assert skill_names == {'skill-one', 'skill-two'} + + +def test_discover_skills_with_resources(tmp_path: Path) -> None: + """Test discovering skills with resource files.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + (skill_dir / 'SKILL.md').write_text("""--- +name: test-skill +description: Skill with resources +--- + +See FORMS.md for details. +""") + + (skill_dir / 'FORMS.md').write_text('# Forms\n\nForm documentation.') + (skill_dir / 'REFERENCE.md').write_text('# Reference\n\nAPI reference.') + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 1 + assert len(skills[0].resources) == 2 + resource_names = {r.name for r in skills[0].resources} + assert resource_names == {'FORMS.md', 'REFERENCE.md'} + + +def test_discover_skills_with_scripts(tmp_path: Path) -> None: + """Test discovering skills with scripts.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + (skill_dir / 'SKILL.md').write_text("""--- +name: test-skill +description: Skill with scripts +--- + +Use the search script. +""") + + scripts_dir = skill_dir / 'scripts' + scripts_dir.mkdir() + (scripts_dir / 'search.py').write_text('#!/usr/bin/env python3\nprint("searching")') + (scripts_dir / 'process.py').write_text('#!/usr/bin/env python3\nprint("processing")') + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 1 + assert len(skills[0].scripts) == 2 + script_names = {s.name for s in skills[0].scripts} + assert script_names == {'search', 'process'} + + +def test_discover_skills_nested_directories(tmp_path: Path) -> None: + """Test discovering skills in nested directories.""" + nested_dir = tmp_path / 'category' / 'subcategory' / 'test-skill' + nested_dir.mkdir(parents=True) + + (nested_dir / 'SKILL.md').write_text("""--- +name: nested-skill +description: Nested skill +--- + +Content. +""") + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 1 + assert skills[0].name == 'nested-skill' + + +def test_discover_skills_missing_name_with_validation(tmp_path: Path) -> None: + """Test discovering skill missing name field with validation enabled.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + (skill_dir / 'SKILL.md').write_text("""--- +description: Missing name field +--- + +Content. +""") + + # With validation, should skip this skill (log warning) + skills = discover_skills([tmp_path], validate=True) + assert len(skills) == 0 + + +def test_discover_skills_missing_name_without_validation(tmp_path: Path) -> None: + """Test discovering skill missing name field without validation.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + (skill_dir / 'SKILL.md').write_text("""--- +description: Missing name field +--- + +Content. +""") + + # Without validation, uses folder name + skills = discover_skills([tmp_path], validate=False) + assert len(skills) == 1 + assert skills[0].name == 'test-skill' # Uses folder name + + +def test_discover_skills_nonexistent_directory(tmp_path: Path) -> None: + """Test discovering skills from non-existent directory.""" + nonexistent = tmp_path / 'does-not-exist' + + # Should not raise, just log warning + skills = discover_skills([nonexistent], validate=True) + assert len(skills) == 0 + + +def test_discover_skills_resources_subdirectory(tmp_path: Path) -> None: + """Test discovering resources in resources/ subdirectory.""" + skill_dir = tmp_path / 'test-skill' + skill_dir.mkdir() + + (skill_dir / 'SKILL.md').write_text("""--- +name: test-skill +description: Skill with resources subdirectory +--- + +Content. +""") + + resources_dir = skill_dir / 'resources' + resources_dir.mkdir() + (resources_dir / 'schema.json').write_text('{}') + (resources_dir / 'template.txt').write_text('template') + + nested_dir = resources_dir / 'nested' + nested_dir.mkdir() + (nested_dir / 'data.csv').write_text('col1,col2') + + skills = discover_skills([tmp_path], validate=True) + + assert len(skills) == 1 + assert len(skills[0].resources) == 3 + + resource_names = {r.name for r in skills[0].resources} + assert 'resources/schema.json' in resource_names + assert 'resources/template.txt' in resource_names + assert 'resources/nested/data.csv' in resource_names + + +# ==================== SkillsToolset Tests ==================== + + +def test_toolset_initialization(sample_skills_dir: Path) -> None: + """Test SkillsToolset initialization.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + assert len(toolset.skills) == 3 + assert 'skill-one' in toolset.skills + assert 'skill-two' in toolset.skills + assert 'skill-three' in toolset.skills + + +def test_toolset_tool_definitions(sample_skills_dir: Path) -> None: + """Test SkillsToolset tool definitions with snapshot.""" + from pydantic_ai._run_context import RunContext + from pydantic_ai._tool_manager import ToolManager + from pydantic_ai.models.test import TestModel + from pydantic_ai.tools import ToolDefinition + from pydantic_ai.usage import RunUsage + + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Build a run context to get tool definitions via ToolManager + context = RunContext( + deps=None, + model=TestModel(), + usage=RunUsage(), + prompt=None, + messages=[], + run_step=0, + ) + + # Get tool manager and prepare for run step + import asyncio + + async def get_tool_defs(): + tool_manager = await ToolManager(toolset).for_run_step(context) + return tool_manager.tool_defs + + tool_defs = asyncio.run(get_tool_defs()) + + # Verify tool definitions match expected structure + assert tool_defs == snapshot( + [ + ToolDefinition( + name='list_skills', + description="""\ +List all available skills with their descriptions. + +Only use this tool if the available skills are not in your system prompt. + +Formatted list of available skills with names and descriptions. +\ +""", + parameters_json_schema={ + 'additionalProperties': False, + 'properties': {}, + 'type': 'object', + }, + ), + ToolDefinition( + name='load_skill', + description="""\ +Load full instructions for a skill. + +Always load the skill before using read_skill_resource +or run_skill_script to understand the skill's capabilities, available +resources, scripts, and their usage patterns. + +Full skill instructions including available resources and scripts. +\ +""", + parameters_json_schema={ + 'additionalProperties': False, + 'properties': {'skill_name': {'description': 'Name of the skill to load.', 'type': 'string'}}, + 'required': ['skill_name'], + 'type': 'object', + }, + ), + ToolDefinition( + name='read_skill_resource', + description="""\ +Read a resource file from a skill (e.g., FORMS.md, REFERENCE.md). + +Call load_skill first to see which resources are available. + +The resource file content. +\ +""", + parameters_json_schema={ + 'additionalProperties': False, + 'properties': { + 'resource_name': { + 'description': 'The resource filename (e.g., "FORMS.md").', + 'type': 'string', + }, + 'skill_name': {'description': 'Name of the skill.', 'type': 'string'}, + }, + 'required': ['skill_name', 'resource_name'], + 'type': 'object', + }, + ), + ToolDefinition( + name='run_skill_script', + description="""\ +Execute a skill script with command-line arguments. + +Call load_skill first to understand the script's expected arguments, +usage patterns, and example invocations. Running scripts without +loading instructions first will likely fail. + +The script's output (stdout and stderr combined). +\ +""", + parameters_json_schema={ + 'additionalProperties': False, + 'properties': { + 'args': { + 'anyOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}], + 'default': None, + 'description': 'Optional list of command-line arguments (positional args, flags, values).', + }, + 'script_name': { + 'description': 'The script name (without .py extension).', + 'type': 'string', + }, + 'skill_name': {'description': 'Name of the skill.', 'type': 'string'}, + }, + 'required': ['skill_name', 'script_name'], + 'type': 'object', + }, + ), + ] + ) + + +def test_toolset_get_skill(sample_skills_dir: Path) -> None: + """Test getting a specific skill.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + skill = toolset.get_skill('skill-one') + assert skill.name == 'skill-one' + assert skill.metadata.description == 'First test skill for basic operations' + + +def test_toolset_get_skill_not_found(sample_skills_dir: Path) -> None: + """Test getting a non-existent skill.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + with pytest.raises(SkillNotFoundError, match="Skill 'nonexistent' not found"): + toolset.get_skill('nonexistent') + + +async def test_list_skills_tool(sample_skills_dir: Path) -> None: + """Test the list_skills tool by checking skills were loaded.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Verify all three skills were discovered + assert len(toolset.skills) == 3 + assert 'skill-one' in toolset.skills + assert 'skill-two' in toolset.skills + assert 'skill-three' in toolset.skills + + # Verify descriptions + assert toolset.skills['skill-one'].metadata.description == 'First test skill for basic operations' + assert toolset.skills['skill-two'].metadata.description == 'Second test skill with resources' + assert toolset.skills['skill-three'].metadata.description == 'Third test skill with executable scripts' + + +async def test_load_skill_tool(sample_skills_dir: Path) -> None: + """Test the load_skill tool.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # The tools are internal, so we test via the public methods + # We can check that the skills were loaded correctly + skill = toolset.get_skill('skill-one') + assert skill is not None + assert skill.name == 'skill-one' + assert 'First test skill for basic operations' in skill.metadata.description + assert 'Use this skill for basic operations' in skill.content + + +async def test_load_skill_not_found(sample_skills_dir: Path) -> None: + """Test loading a non-existent skill.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Test that nonexistent skill raises an error + with pytest.raises(SkillNotFoundError): + toolset.get_skill('nonexistent-skill') + + +async def test_read_skill_resource_tool(sample_skills_dir: Path) -> None: + """Test the read_skill_resource tool.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Test that skill-two has the expected resources + skill = toolset.get_skill('skill-two') + assert len(skill.resources) == 2 + + resource_names = [r.name for r in skill.resources] + assert 'FORMS.md' in resource_names + assert 'REFERENCE.md' in resource_names + + # Check that resources can be read + for resource in skill.resources: + assert resource.path.exists() + assert resource.path.is_file() + + +async def test_read_skill_resource_not_found(sample_skills_dir: Path) -> None: + """Test reading a non-existent resource.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Test skill with no resources + skill_one = toolset.get_skill('skill-one') + assert len(skill_one.resources) == 0 + + # Test skill with resources + skill_two = toolset.get_skill('skill-two') + resource_names = [r.name for r in skill_two.resources] + assert 'NONEXISTENT.md' not in resource_names + + +async def test_run_skill_script_tool(sample_skills_dir: Path) -> None: + """Test the run_skill_script tool.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Test that skill-three has scripts + skill = toolset.get_skill('skill-three') + assert len(skill.scripts) == 2 + + script_names = [s.name for s in skill.scripts] + assert 'hello' in script_names + assert 'echo' in script_names + + # Check that scripts can be found + for script in skill.scripts: + assert script.path.exists() + assert script.path.is_file() + assert script.path.suffix == '.py' + + +async def test_run_skill_script_not_found(sample_skills_dir: Path) -> None: + """Test running a non-existent script.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Test skill with no scripts + skill_one = toolset.get_skill('skill-one') + assert len(skill_one.scripts) == 0 + + # Test skill with scripts + skill_three = toolset.get_skill('skill-three') + script_names = [s.name for s in skill_three.scripts] + assert 'nonexistent' not in script_names + + +def test_get_skills_system_prompt(sample_skills_dir: Path) -> None: + """Test generating the system prompt.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + prompt = toolset.get_skills_system_prompt() + + # Should include all skill names and descriptions + assert 'skill-one' in prompt + assert 'skill-two' in prompt + assert 'skill-three' in prompt + assert 'First test skill for basic operations' in prompt + assert 'Second test skill with resources' in prompt + assert 'Third test skill with executable scripts' in prompt + + # Should include usage instructions + assert 'load_skill' in prompt + assert 'read_skill_resource' in prompt + assert 'run_skill_script' in prompt + + # Should include progressive disclosure guidance + assert 'Progressive disclosure' in prompt or 'progressive disclosure' in prompt + + +def test_get_skills_system_prompt_empty() -> None: + """Test system prompt with no skills.""" + toolset = SkillsToolset(directories=[], auto_discover=False) + + prompt = toolset.get_skills_system_prompt() + assert prompt == '' + + +def test_toolset_refresh(sample_skills_dir: Path) -> None: + """Test refreshing skills.""" + toolset = SkillsToolset(directories=[sample_skills_dir]) + + initial_count = len(toolset.skills) + + # Add a new skill + new_skill_dir = sample_skills_dir / 'skill-four' + new_skill_dir.mkdir() + (new_skill_dir / 'SKILL.md').write_text("""--- +name: skill-four +description: Fourth skill added after initialization +--- + +New skill content. +""") + + # Refresh + toolset.refresh() + + assert len(toolset.skills) == initial_count + 1 + assert 'skill-four' in toolset.skills diff --git a/uv.lock b/uv.lock index 75bc6b08fb..71a03a3203 100644 --- a/uv.lock +++ b/uv.lock @@ -5558,6 +5558,7 @@ dependencies = [ { name = "opentelemetry-api" }, { name = "pydantic" }, { name = "pydantic-graph" }, + { name = "pyyaml" }, { name = "typing-inspection" }, ] @@ -5709,6 +5710,7 @@ requires-dist = [ { name = "pydantic-evals", marker = "extra == 'evals'", editable = "pydantic_evals" }, { name = "pydantic-graph", editable = "pydantic_graph" }, { name = "pyperclip", marker = "extra == 'cli'", specifier = ">=1.9.0" }, + { name = "pyyaml", specifier = ">=6.0" }, { name = "requests", marker = "extra == 'vertexai'", specifier = ">=2.32.2" }, { name = "rich", marker = "extra == 'cli'", specifier = ">=13" }, { name = "starlette", marker = "extra == 'ag-ui'", specifier = ">=0.45.3" }, From 3e69b2de1246965efa48517a9f38723750c78f43 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:02:26 -0300 Subject: [PATCH 02/17] remove logging implementation --- .../pydantic_ai/toolsets/skills/_discovery.py | 29 +++---------------- .../pydantic_ai/toolsets/skills/_toolset.py | 13 --------- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py index 5c4919ac07..2691020f26 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py @@ -6,7 +6,6 @@ from __future__ import annotations -import logging import re from collections.abc import Sequence from pathlib import Path @@ -22,8 +21,6 @@ SkillScript, ) -logger = logging.getLogger('pydantic_ai.skills') - # Anthropic's naming convention: lowercase letters, numbers, and hyphens only SKILL_NAME_PATTERN = re.compile(r'^[a-z0-9-]+$') RESERVED_WORDS = {'anthropic', 'claude'} @@ -226,11 +223,9 @@ def discover_skills( dir_path = Path(skill_dir).expanduser().resolve() if not dir_path.exists(): - logger.warning('Skills directory does not exist: %s', dir_path) continue if not dir_path.is_dir(): - logger.warning('Skills path is not a directory: %s', dir_path) continue # Find all SKILL.md files (recursive search) @@ -247,17 +242,7 @@ def discover_skills( # Validation if validate: if not name: - logger.warning( - 'Skill at %s missing required "name" field, skipping', - skill_folder, - ) continue - if not description: - logger.warning( - 'Skill "%s" at %s missing "description" field', - name, - skill_folder, - ) # Use folder name if name not provided if not name: @@ -273,11 +258,9 @@ def discover_skills( extra=extra, ) - # Validate metadata (log warnings) + # Validate metadata if validate: - validation_warnings = validate_skill_metadata(frontmatter, instructions) - for warning in validation_warnings: - logger.warning('Skill "%s" at %s: %s', name, skill_folder, warning) + validate_skill_metadata(frontmatter, instructions) # Discover resources and scripts resources = _discover_resources(skill_folder) @@ -294,14 +277,10 @@ def discover_skills( ) skills.append(skill) - logger.debug('Discovered skill: %s at %s', name, skill_folder) - except SkillValidationError as e: - logger.exception('Skill validation error in %s: %s', skill_file, e) + except SkillValidationError: raise - except OSError as e: - logger.warning('Failed to load skill from %s: %s', skill_file, e) + except OSError: continue - logger.info('Discovered %d skills from %d directories', len(skills), len(directories)) return skills diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py index 36f1c26c4f..d53d304849 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -6,7 +6,6 @@ from __future__ import annotations -import logging import sys from pathlib import Path from typing import Any @@ -23,8 +22,6 @@ ) from ._types import Skill -logger = logging.getLogger('pydantic_ai.skills') - def _is_safe_path(base_path: Path, target_path: Path) -> bool: """Check if target_path is safely within base_path (no path traversal). @@ -168,7 +165,6 @@ async def load_skill(ctx: RunContext[Any], skill_name: str) -> str: # noqa: D41 return f"Error: Skill '{skill_name}' not found. Available skills: {available}" skill = self._skills[skill_name] - logger.info('Loading skill: %s', skill_name) lines = [ f'# Skill: {skill.name}', @@ -236,15 +232,12 @@ async def read_skill_resource( # noqa: D417 # pyright: ignore[reportUnusedFunc # Security check if not _is_safe_path(skill.path, resource.path): - logger.warning('Path traversal attempt detected: %s in %s', resource_name, skill_name) return 'Error: Resource path escapes skill directory.' try: content = resource.path.read_text(encoding='utf-8') - logger.info('Read resource: %s from skill %s', resource_name, skill_name) return content except OSError as e: - logger.error('Failed to read resource %s: %s', resource_name, e) raise SkillResourceLoadError(f"Failed to read resource '{resource_name}': {e}") from e @self.tool @@ -289,7 +282,6 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio # Security check if not _is_safe_path(skill.path, script.path): - logger.warning('Path traversal attempt detected: %s in %s', script_name, skill_name) return 'Error: Script path escapes skill directory.' # Build command @@ -297,8 +289,6 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio if args: cmd.extend(args) - logger.info('Running script: %s with args: %s', script_name, args) - try: # Use anyio.run_process for async-compatible execution result = None @@ -311,7 +301,6 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio # Check if timeout was reached if scope.cancelled_caught: - logger.error('Script %s timed out after %d seconds', script_name, self._script_timeout) raise SkillScriptExecutionError( f"Script '{script_name}' timed out after {self._script_timeout} seconds" ) @@ -331,7 +320,6 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio return output.strip() or '(no output)' except OSError as e: - logger.error('Failed to execute script %s: %s', script_name, e) raise SkillScriptExecutionError(f"Failed to execute script '{script_name}': {e}") from e def get_skills_system_prompt(self) -> str: @@ -420,5 +408,4 @@ def refresh(self) -> None: Call this method to reload skills after changes to the filesystem. """ - logger.info('Refreshing skills from directories') self._discover_skills() From 998a3247038b54c705cfb20568ba261b9c2e5df1 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:29:20 -0300 Subject: [PATCH 03/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 223e85a0a4..181f1c5e57 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -265,7 +265,7 @@ def test_parse_skill_md_complex_frontmatter() -> None: # ==================== Validation Tests ==================== -def testvalidate_skill_metadata_valid() -> None: +def test_validate_skill_metadata_valid() -> None: """Test validation with valid metadata.""" frontmatter = { 'name': 'test-skill', From 9924c19d2034a1ec7609b5b4111a9d05219edaae Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:29:34 -0300 Subject: [PATCH 04/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 181f1c5e57..2228710d97 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -299,7 +299,7 @@ def testvalidate_skill_metadata_invalid_name_format() -> None: assert any('lowercase letters, numbers, and hyphens' in w for w in warnings) -def testvalidate_skill_metadata_reserved_word() -> None: +def test_validate_skill_metadata_reserved_word() -> None: """Test validation with reserved words in name.""" frontmatter = { 'name': 'anthropic-helper', From 140847bfba018251f7a52c694c73eba343030dde Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:29:57 -0300 Subject: [PATCH 05/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 2228710d97..1ff336c051 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -275,7 +275,7 @@ def test_validate_skill_metadata_valid() -> None: assert len(warnings) == 0 -def testvalidate_skill_metadata_name_too_long() -> None: +def test_validate_skill_metadata_name_too_long() -> None: """Test validation with name exceeding 64 characters.""" frontmatter = { 'name': 'a' * 65, From 079a434a076fad5c7727ce45faf971e623b1eebd Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:30:11 -0300 Subject: [PATCH 06/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 1ff336c051..e5845d2658 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -352,7 +352,7 @@ def testvalidate_skill_metadata_multiple_issues() -> None: assert len(warnings) >= 3 -def testvalidate_skill_metadata_good_naming_conventions() -> None: +def test_validate_skill_metadata_good_naming_conventions() -> None: """Test validation with valid naming conventions.""" good_names = [ 'processing-pdfs', From 611526d5c8f8bb4ebe22b043b82a53e36bd6d090 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:30:22 -0300 Subject: [PATCH 07/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index e5845d2658..88a50bf2f3 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -368,7 +368,7 @@ def test_validate_skill_metadata_good_naming_conventions() -> None: assert len(warnings) == 0, f"Name '{name}' should be valid" -def testvalidate_skill_metadata_bad_naming_conventions() -> None: +def test_validate_skill_metadata_bad_naming_conventions() -> None: """Test validation with invalid naming conventions.""" bad_names = [ 'Invalid_Name', # Underscores From 2ac5779e57a9cde7e4e22e44a5199650ac5f8a77 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:30:37 -0300 Subject: [PATCH 08/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 88a50bf2f3..95a6898e92 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -287,7 +287,7 @@ def test_validate_skill_metadata_name_too_long() -> None: assert '64 characters' in warnings[0] -def testvalidate_skill_metadata_invalid_name_format() -> None: +def test_validate_skill_metadata_invalid_name_format() -> None: """Test validation with invalid name format.""" frontmatter = { 'name': 'Invalid_Name_With_Underscores', From f02cac39d8bda30f5fccadf28147aa20341fb84a Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:30:47 -0300 Subject: [PATCH 09/17] Update tests/test_skills.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_skills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_skills.py b/tests/test_skills.py index 95a6898e92..05110a7326 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -338,7 +338,7 @@ def testvalidate_skill_metadata_instructions_too_long() -> None: assert any('500 lines' in w for w in warnings) -def testvalidate_skill_metadata_multiple_issues() -> None: +def test_validate_skill_metadata_multiple_issues() -> None: """Test validation with multiple issues.""" frontmatter = { 'name': 'A' * 65, # Too long From 028cfb4fdaae4e8551d383a9ae561bc45b98e1ab Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Fri, 19 Dec 2025 19:32:04 -0300 Subject: [PATCH 10/17] update skills docs per copilot suggestions --- docs/skills.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/skills.md b/docs/skills.md index 837b0a1150..0eb2236302 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -77,7 +77,7 @@ my-skill/ The `SKILL.md` file uses **YAML frontmatter** for metadata and **Markdown** for instructions: -```markdown +````markdown --- name: arxiv-search description: Search arXiv for research papers @@ -105,17 +105,15 @@ To search arXiv, use the `run_skill_script` tool with: 3. **args**: Your search query and options ## Example -``` +```python run_skill_script( -skill_name="arxiv-search", -script_name="arxiv_search", -args=["machine learning", "--max-papers", "5"] + skill_name="arxiv-search", + script_name="arxiv_search", + args=["machine learning", "--max-papers", "5"] ) - -``` - ``` +```` ### Required Fields @@ -137,7 +135,7 @@ Following Anthropic's skill naming conventions: The toolset implements **progressive disclosure** - exposing information only when needed: -``` +```markdown ┌─────────────────────────────────────────────────────────────┐ │ System Prompt (via get_skills_system_prompt()) │ │ ┌───────────────────────────────────────────────────────┐ │ @@ -236,7 +234,7 @@ Place scripts in either: - `scripts/` subdirectory (recommended) - Directly in the skill folder -``` +```markdown my-skill/ ├── SKILL.md └── scripts/ @@ -291,7 +289,7 @@ toolset = SkillsToolset( directories=["./skills", "./shared-skills"], auto_discover=True, # Auto-discover skills on init (default: True) validate=True, # Validate skill structure (default: True) - toolset_id="skills", # Unique identifier (default: "skills") + id="skills", # Unique identifier (default: "skills") script_timeout=30, # Script execution timeout in seconds (default: 30) python_executable=None, # Python executable path (default: sys.executable) ) @@ -403,7 +401,7 @@ Here's a complete example with a skill that searches for research papers: ### Skill Structure -``` +```markdown skills/ └── arxiv-search/ ├── SKILL.md @@ -413,7 +411,7 @@ skills/ ### SKILL.md -```markdown +````markdown --- name: arxiv-search description: Search arXiv for research papers by query @@ -433,13 +431,15 @@ Use `run_skill_script` with: ## Example To find papers about transformers: -``` - -run_skill_script("arxiv-search", "arxiv_search", ["transformers attention mechanism", "--max-papers", "3"]) - -``` +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["transformers attention mechanism", "--max-papers", "3"] +) ``` +```` ### arxiv_search.py From 5ea1d01c9e05b720e3840d649ae47e74199af1ab Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 09:09:30 -0300 Subject: [PATCH 11/17] Update docs/skills.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/skills.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/skills.md b/docs/skills.md index 0eb2236302..ab670afea0 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -393,7 +393,7 @@ The toolset includes security measures: - **Path traversal prevention**: Resources and scripts are validated to stay within the skill directory - **Script timeout**: Scripts have a configurable timeout (default: 30 seconds) -- **Sandboxed execution**: Scripts run in a subprocess with limited access +- **Subprocess execution**: Scripts run in a separate process, but with the same OS-level permissions as your agent process (this is not a security sandbox) ## Complete Example From bde5cab8f34d77e4d2848ab1c370cbf0dcb30123 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 09:34:19 -0300 Subject: [PATCH 12/17] add max_depth --- .../pydantic_ai/toolsets/skills/_discovery.py | 37 ++++++++++++++++++- .../pydantic_ai/toolsets/skills/_toolset.py | 8 +++- tests/test_skills.py | 4 +- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py index 2691020f26..dbbb87ce9b 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py @@ -155,6 +155,35 @@ def _discover_resources(skill_folder: Path) -> list[SkillResource]: return resources +def _find_skill_files(root_dir: Path, max_depth: int | None) -> list[Path]: + """Find SKILL.md files with depth-limited search using optimized glob patterns. + + Args: + root_dir: Root directory to search from. + max_depth: Maximum depth to search. None for unlimited. + + Returns: + List of paths to SKILL.md files. + """ + if max_depth is None: + # Unlimited recursive search + return list(root_dir.glob('**/SKILL.md')) + + # Build explicit glob patterns for each depth level + # This is much faster than iterdir() while still limiting depth + skill_files: list[Path] = [] + + for depth in range(max_depth + 1): + if depth == 0: + pattern = 'SKILL.md' + else: + pattern = '/'.join(['*'] * depth) + '/SKILL.md' + + skill_files.extend(root_dir.glob(pattern)) + + return skill_files + + def _discover_scripts(skill_folder: Path, skill_name: str) -> list[SkillScript]: """Discover executable scripts in a skill folder. @@ -201,6 +230,7 @@ def _discover_scripts(skill_folder: Path, skill_name: str) -> list[SkillScript]: def discover_skills( directories: Sequence[str | Path], validate: bool = True, + max_depth: int | None = 3, ) -> list[Skill]: """Discover skills from filesystem directories. @@ -210,6 +240,8 @@ def discover_skills( Args: directories: List of directory paths to search for skills. validate: Whether to validate skill structure (requires name and description). + max_depth: Maximum depth to search for SKILL.md files. None for unlimited. + Default is 3 levels deep to prevent performance issues with large trees. Returns: List of discovered Skill objects. @@ -228,8 +260,9 @@ def discover_skills( if not dir_path.is_dir(): continue - # Find all SKILL.md files (recursive search) - for skill_file in dir_path.glob('**/SKILL.md'): + # Find all SKILL.md files (depth-limited search for performance) + skill_files = _find_skill_files(dir_path, max_depth) + for skill_file in skill_files: try: skill_folder = skill_file.parent content = skill_file.read_text(encoding='utf-8') diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py index d53d304849..3fbfd7a2ee 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -82,6 +82,7 @@ def __init__( id: str | None = None, script_timeout: int = 30, python_executable: str | Path | None = None, + max_depth: int | None = 3, ) -> None: """Initialize the skills toolset. @@ -93,11 +94,14 @@ def __init__( script_timeout: Timeout in seconds for script execution (default: 30). python_executable: Path to Python executable for running scripts. If None, uses sys.executable (default). + max_depth: Maximum depth to search for SKILL.md files. None for unlimited. + Default is 3 levels deep to prevent performance issues with large trees. """ super().__init__(id=id) self._directories = [Path(d) for d in directories] self._validate = validate + self._max_depth = max_depth self._script_timeout = script_timeout self._python_executable = str(python_executable) if python_executable else sys.executable self._skills: dict[str, Skill] = {} @@ -113,6 +117,7 @@ def _discover_skills(self) -> None: skills = discover_skills( directories=self._directories, validate=self._validate, + max_depth=self._max_depth, ) self._skills = {skill.name: skill for skill in skills} @@ -146,7 +151,7 @@ async def list_skills(_ctx: RunContext[Any]) -> str: # pyright: ignore[reportUn return '\n'.join(lines) @self.tool - async def load_skill(ctx: RunContext[Any], skill_name: str) -> str: # noqa: D417 # pyright: ignore[reportUnusedFunction] + async def load_skill(ctx: RunContext[Any], skill_name: str) -> str: # pyright: ignore[reportUnusedFunction] """Load full instructions for a skill. Always load the skill before using read_skill_resource @@ -154,6 +159,7 @@ async def load_skill(ctx: RunContext[Any], skill_name: str) -> str: # noqa: D41 resources, scripts, and their usage patterns. Args: + ctx: Run context (required by toolset protocol). skill_name: Name of the skill to load. Returns: diff --git a/tests/test_skills.py b/tests/test_skills.py index 05110a7326..fabd0d3a61 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -311,7 +311,7 @@ def test_validate_skill_metadata_reserved_word() -> None: assert any('reserved word' in w for w in warnings) -def testvalidate_skill_metadata_description_too_long() -> None: +def test_validate_skill_metadata_description_too_long() -> None: """Test validation with description exceeding 1024 characters.""" frontmatter = { 'name': 'test-skill', @@ -323,7 +323,7 @@ def testvalidate_skill_metadata_description_too_long() -> None: assert any('1024 characters' in w for w in warnings) -def testvalidate_skill_metadata_instructions_too_long() -> None: +def test_validate_skill_metadata_instructions_too_long() -> None: """Test validation with instructions exceeding 500 lines.""" frontmatter = { 'name': 'test-skill', From 852df1cf0776056ca4b980346d359417cddeedd5 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 09:36:03 -0300 Subject: [PATCH 13/17] add max_depth --- pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py index 3fbfd7a2ee..abdb0b5c86 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -17,7 +17,6 @@ from ._discovery import discover_skills from ._exceptions import ( SkillNotFoundError, - SkillResourceLoadError, SkillScriptExecutionError, ) from ._types import Skill @@ -244,7 +243,7 @@ async def read_skill_resource( # noqa: D417 # pyright: ignore[reportUnusedFunc content = resource.path.read_text(encoding='utf-8') return content except OSError as e: - raise SkillResourceLoadError(f"Failed to read resource '{resource_name}': {e}") from e + return f"Error: Failed to read resource '{resource_name}': {e}" @self.tool async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunction] From fb84c07b49d33cd434cd6f9b53b55e97ee591bff Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 10:00:41 -0300 Subject: [PATCH 14/17] refactor validate_skill_metadata func --- .../pydantic_ai/toolsets/skills/__init__.py | 3 +- .../pydantic_ai/toolsets/skills/_discovery.py | 48 ++++--- tests/test_skills.py | 123 ------------------ 3 files changed, 34 insertions(+), 140 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py index 76860b5bff..b18a732196 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py @@ -32,7 +32,7 @@ def add_skills_to_system_prompt() -> str: ``` """ -from pydantic_ai.toolsets.skills._discovery import discover_skills, parse_skill_md, validate_skill_metadata +from pydantic_ai.toolsets.skills._discovery import discover_skills, parse_skill_md from pydantic_ai.toolsets.skills._exceptions import ( SkillException, SkillNotFoundError, @@ -54,7 +54,6 @@ def add_skills_to_system_prompt() -> str: # Discovery 'discover_skills', 'parse_skill_md', - 'validate_skill_metadata', # Exceptions 'SkillException', 'SkillNotFoundError', diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py index dbbb87ce9b..33a82470be 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_discovery.py @@ -7,6 +7,7 @@ from __future__ import annotations import re +import warnings from collections.abc import Sequence from pathlib import Path from typing import Any @@ -26,50 +27,67 @@ RESERVED_WORDS = {'anthropic', 'claude'} -def validate_skill_metadata( +def _validate_skill_metadata( frontmatter: dict[str, Any], instructions: str, -) -> list[str]: +) -> bool: """Validate skill metadata against Anthropic's requirements. + Emits warnings for any validation issues found. + Args: frontmatter: Parsed YAML frontmatter. instructions: The skill instructions content. Returns: - List of validation warnings (empty if no issues). + True if validation passed with no issues, False if warnings were emitted. """ - warnings_list: list[str] = [] - + is_valid = True name = frontmatter.get('name', '') description = frontmatter.get('description', '') # Validate name format if name: - # Check length first to prevent regex on excessively long strings + # Check length first to avoid processing excessively long names (good practice) if len(name) > 64: - warnings_list.append(f"Skill name '{name}' exceeds 64 characters ({len(name)} chars)") - # Only run regex if name is reasonable length (defense in depth) + warnings.warn( + f"Skill name '{name}' exceeds 64 characters ({len(name)} chars) recommendation. Consider shortening it.", + UserWarning, + stacklevel=2, + ) + is_valid = False elif not SKILL_NAME_PATTERN.match(name): - warnings_list.append(f"Skill name '{name}' should contain only lowercase letters, numbers, and hyphens") + warnings.warn( + f"Skill name '{name}' should contain only lowercase letters, numbers, and hyphens", + UserWarning, + stacklevel=2, + ) + is_valid = False # Check for reserved words for reserved in RESERVED_WORDS: if reserved in name: - warnings_list.append(f"Skill name '{name}' contains reserved word '{reserved}'") + warnings.warn(f"Skill name '{name}' contains reserved word '{reserved}'", UserWarning, stacklevel=2) + is_valid = False # Validate description if description and len(description) > 1024: - warnings_list.append(f'Skill description exceeds 1024 characters ({len(description)} chars)') + warnings.warn( + f'Skill description exceeds 1024 characters ({len(description)} chars)', UserWarning, stacklevel=2 + ) + is_valid = False # Validate instructions length (Anthropic recommends under 500 lines) lines = instructions.split('\n') if len(lines) > 500: - warnings_list.append( + warnings.warn( f'SKILL.md body exceeds recommended 500 lines ({len(lines)} lines). ' - f'Consider splitting into separate resource files.' + f'Consider splitting into separate resource files.', + UserWarning, + stacklevel=2, ) + is_valid = False - return warnings_list + return is_valid def parse_skill_md(content: str) -> tuple[dict[str, Any], str]: @@ -293,7 +311,7 @@ def discover_skills( # Validate metadata if validate: - validate_skill_metadata(frontmatter, instructions) + _ = _validate_skill_metadata(frontmatter, instructions) # Discover resources and scripts resources = _discover_resources(skill_folder) diff --git a/tests/test_skills.py b/tests/test_skills.py index fabd0d3a61..e02f7205fb 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -15,7 +15,6 @@ SkillValidationError, discover_skills, parse_skill_md, - validate_skill_metadata, ) pytestmark = pytest.mark.anyio @@ -262,128 +261,6 @@ def test_parse_skill_md_complex_frontmatter() -> None: assert frontmatter['metadata']['category'] == 'test' -# ==================== Validation Tests ==================== - - -def test_validate_skill_metadata_valid() -> None: - """Test validation with valid metadata.""" - frontmatter = { - 'name': 'test-skill', - 'description': 'A valid test skill', - } - warnings = validate_skill_metadata(frontmatter, 'Content here.') - assert len(warnings) == 0 - - -def test_validate_skill_metadata_name_too_long() -> None: - """Test validation with name exceeding 64 characters.""" - frontmatter = { - 'name': 'a' * 65, - 'description': 'Test', - } - warnings = validate_skill_metadata(frontmatter, 'Content') - - assert len(warnings) == 1 - assert '64 characters' in warnings[0] - - -def test_validate_skill_metadata_invalid_name_format() -> None: - """Test validation with invalid name format.""" - frontmatter = { - 'name': 'Invalid_Name_With_Underscores', - 'description': 'Test', - } - warnings = validate_skill_metadata(frontmatter, 'Content') - - assert len(warnings) >= 1 - assert any('lowercase letters, numbers, and hyphens' in w for w in warnings) - - -def test_validate_skill_metadata_reserved_word() -> None: - """Test validation with reserved words in name.""" - frontmatter = { - 'name': 'anthropic-helper', - 'description': 'Test', - } - warnings = validate_skill_metadata(frontmatter, 'Content') - - assert len(warnings) >= 1 - assert any('reserved word' in w for w in warnings) - - -def test_validate_skill_metadata_description_too_long() -> None: - """Test validation with description exceeding 1024 characters.""" - frontmatter = { - 'name': 'test-skill', - 'description': 'x' * 1025, - } - warnings = validate_skill_metadata(frontmatter, 'Content') - - assert len(warnings) >= 1 - assert any('1024 characters' in w for w in warnings) - - -def test_validate_skill_metadata_instructions_too_long() -> None: - """Test validation with instructions exceeding 500 lines.""" - frontmatter = { - 'name': 'test-skill', - 'description': 'Test', - } - # Create content with 501 lines - instructions = '\n'.join([f'Line {i}' for i in range(501)]) - - warnings = validate_skill_metadata(frontmatter, instructions) - - assert len(warnings) >= 1 - assert any('500 lines' in w for w in warnings) - - -def test_validate_skill_metadata_multiple_issues() -> None: - """Test validation with multiple issues.""" - frontmatter = { - 'name': 'A' * 65, # Too long - 'description': 'x' * 1025, # Too long - } - instructions = '\n'.join([f'Line {i}' for i in range(501)]) # Too many lines - - warnings = validate_skill_metadata(frontmatter, instructions) - - # Should have warnings for name, description, and instructions - assert len(warnings) >= 3 - - -def test_validate_skill_metadata_good_naming_conventions() -> None: - """Test validation with valid naming conventions.""" - good_names = [ - 'processing-pdfs', - 'analyzing-spreadsheets', - 'test-skill-123', - 'pdf-processing', - 'skill-1', - ] - - for name in good_names: - frontmatter = {'name': name, 'description': 'Test'} - warnings = validate_skill_metadata(frontmatter, 'Content') - assert len(warnings) == 0, f"Name '{name}' should be valid" - - -def test_validate_skill_metadata_bad_naming_conventions() -> None: - """Test validation with invalid naming conventions.""" - bad_names = [ - 'Invalid_Name', # Underscores - 'InvalidName', # Capital letters - 'invalid name', # Spaces - 'invalid.name', # Periods - 'claude-tools', # Reserved word - ] - - for name in bad_names: - frontmatter = {'name': name, 'description': 'Test'} - warnings = validate_skill_metadata(frontmatter, 'Content') - assert len(warnings) > 0, f"Name '{name}' should trigger warnings" - - # ==================== Discovery Tests ==================== From d8dd95363eec8d4409b918b114b5e82b51a551ae Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 10:02:16 -0300 Subject: [PATCH 15/17] Update pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py index abdb0b5c86..9b74511734 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -310,8 +310,11 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio f"Script '{script_name}' timed out after {self._script_timeout} seconds" ) - # At this point, result should be set (timeout check passed) - assert result is not None + # At this point, result should be set; if not, treat as an execution error + if result is None: + raise SkillScriptExecutionError( + f"Script '{script_name}' did not complete execution; no result was returned" + ) # Decode output from bytes to string output = result.stdout.decode('utf-8', errors='replace') From bbd874134d2b204db9b9f452b85692de02c9d7c8 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 11:40:09 -0300 Subject: [PATCH 16/17] refactor skills instructions and make it customizable --- docs/skills.md | 56 +++++--- examples/pydantic_ai_examples/skills_agent.py | 6 +- .../pydantic_ai/agent/__init__.py | 25 +++- pydantic_ai_slim/pydantic_ai/mcp.py | 59 +++++++- .../pydantic_ai/toolsets/abstract.py | 15 +++ .../pydantic_ai/toolsets/skills/__init__.py | 6 +- .../pydantic_ai/toolsets/skills/_toolset.py | 97 +++++++------- tests/test_mcp.py | 56 +++++++- tests/test_skills.py | 126 +++++++++++++----- 9 files changed, 327 insertions(+), 119 deletions(-) diff --git a/docs/skills.md b/docs/skills.md index ab670afea0..ca4ec34caf 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -23,17 +23,13 @@ from pydantic_ai import Agent, SkillsToolset skills_toolset = SkillsToolset(directories=["./skills"]) # Create agent with skills +# Skills instructions are automatically injected via get_instructions() agent = Agent( model='openai:gpt-4o', instructions="You are a helpful research assistant.", toolsets=[skills_toolset] ) -# Add skills system prompt -@agent.system_prompt -async def add_skills_to_system_prompt() -> str: - return skills_toolset.get_skills_system_prompt() - # Use agent - skills tools are automatically available result = await agent.run( "What are the last 3 papers on arXiv about machine learning?" @@ -137,7 +133,7 @@ The toolset implements **progressive disclosure** - exposing information only wh ```markdown ┌─────────────────────────────────────────────────────────────┐ -│ System Prompt (via get_skills_system_prompt()) │ +│ System Prompt (automatically injected via toolset) │ │ ┌───────────────────────────────────────────────────────┐ │ │ │ Available Skills: │ │ │ │ - arxiv-search: Search arXiv for research papers │ │ @@ -182,7 +178,7 @@ Lists all available skills with their descriptions. **Returns**: Formatted markdown with skill names and descriptions -**When to use**: Optional - skills are already listed in the system prompt via `get_skills_system_prompt()`. Use only if the agent needs to re-check available skills dynamically. +**When to use**: Optional - skills are already listed in the system prompt automatically. Use only if the agent needs to re-check available skills dynamically. ### 2. `load_skill(skill_name)` @@ -287,21 +283,21 @@ from pydantic_ai.toolsets import SkillsToolset toolset = SkillsToolset( directories=["./skills", "./shared-skills"], - auto_discover=True, # Auto-discover skills on init (default: True) - validate=True, # Validate skill structure (default: True) - id="skills", # Unique identifier (default: "skills") - script_timeout=30, # Script execution timeout in seconds (default: 30) - python_executable=None, # Python executable path (default: sys.executable) + auto_discover=True, # Auto-discover skills on init (default: True) + validate=True, # Validate skill structure (default: True) + id="skills", # Unique identifier (default: "skills") + script_timeout=30, # Script execution timeout in seconds (default: 30) + python_executable=None, # Python executable path (default: sys.executable) + instruction_template=None, # Custom instruction template (default: None) ) ``` ### Key Methods -| Method | Description | -| ---------------------------- | ---------------------------------------------- | -| `get_skills_system_prompt()` | Get system prompt text with all skill metadata | -| `get_skill(name)` | Get a specific skill object by name | -| `refresh()` | Re-scan directories for skills | +| Method | Description | +| ----------------- | ----------------------------------------- | +| `get_skill(name)` | Get a specific skill object by name | +| `refresh()` | Re-scan directories for skills | ### Properties @@ -309,6 +305,27 @@ toolset = SkillsToolset( | -------- | ------------------------------------------------ | | `skills` | Dictionary of loaded skills (`dict[str, Skill]`) | +### Customizing Instructions + +You can customize the instruction template that gets injected into the agent's system prompt: + +```python +custom_template = """# My Custom Skills Section + +Available tools: +{skills_list} + +Use load_skill(name) to get details. +""" + +toolset = SkillsToolset( + directories=["./skills"], + instruction_template=custom_template +) +``` + +The template must include the `{skills_list}` placeholder, which will be replaced with the formatted list of available skills. + ## Skill Discovery Skills can be discovered programmatically: @@ -511,10 +528,7 @@ async def main(): instructions="You are a research assistant.", toolsets=[skills_toolset] ) - - @agent.system_prompt - async def add_skills(): - return skills_toolset.get_skills_system_prompt() + # Skills instructions are automatically injected result = await agent.run( "Find the 3 most recent papers about large language models" diff --git a/examples/pydantic_ai_examples/skills_agent.py b/examples/pydantic_ai_examples/skills_agent.py index f0fdf784ea..a61ef113a8 100644 --- a/examples/pydantic_ai_examples/skills_agent.py +++ b/examples/pydantic_ai_examples/skills_agent.py @@ -23,17 +23,13 @@ async def main() -> None: skills_toolset = SkillsToolset(directories=[skills_dir]) # Create agent with skills + # Skills instructions are automatically injected via get_instructions() agent = Agent( model='openai:gpt-4o', instructions='You are a helpful research assistant.', toolsets=[skills_toolset], ) - # Add skills system prompt (includes skill descriptions and usage instructions) - @agent.system_prompt - async def add_skills_prompt() -> str: # pyright: ignore[reportUnusedFunction] - return skills_toolset.get_skills_system_prompt() - # Use agent - skills tools are available for the agent to call result = await agent.run('What are the main features of Pydantic AI framework?') print(f'\nResponse:\n{result.output}') diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py index b8d67d15d6..67fe004928 100644 --- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py @@ -611,15 +611,32 @@ async def main(): instructions_literal, instructions_functions = self._get_instructions(additional_instructions=instructions) async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None: - parts = [ + parts: list[str | None] = [ instructions_literal, *[await func.run(run_context) for func in instructions_functions], ] - parts = [p for p in parts if p] - if not parts: + # Collect instructions from toolsets + toolset_instructions: list[str | None] = [] + + async def collect_toolset_instructions(ts: AbstractToolset[AgentDepsT]) -> None: + instruction = await ts.get_instructions(run_context) + if instruction: + toolset_instructions.append(instruction) + + # Use apply() to visit all leaf toolsets and collect their instructions + # We need to run this asynchronously for each toolset + toolsets_to_visit: list[AbstractToolset[AgentDepsT]] = [] + toolset.apply(toolsets_to_visit.append) + for ts in toolsets_to_visit: + await collect_toolset_instructions(ts) + + parts.extend(toolset_instructions) + + filtered_parts: list[str] = [p for p in parts if p] + if not filtered_parts: return None - return '\n\n'.join(parts).strip() + return '\n\n'.join(filtered_parts).strip() if isinstance(model_used, InstrumentedModel): instrumentation_settings = model_used.instrumentation_settings diff --git a/pydantic_ai_slim/pydantic_ai/mcp.py b/pydantic_ai_slim/pydantic_ai/mcp.py index 1420d0cfb0..3669c145ac 100644 --- a/pydantic_ai_slim/pydantic_ai/mcp.py +++ b/pydantic_ai_slim/pydantic_ai/mcp.py @@ -355,6 +355,15 @@ class MCPServer(AbstractToolset[Any], ABC): Set to `False` for servers that change resources dynamically without sending notifications. """ + use_server_instructions: bool + """Whether to include the server's instructions in the agent's system prompt. + + When enabled, the instructions sent by the MCP server during initialization + will be automatically injected into the agent's system prompt via `get_instructions()`. + + Defaults to `False` for backward compatibility. + """ + _id: str | None _enter_lock: Lock = field(compare=False) @@ -385,6 +394,7 @@ def __init__( elicitation_callback: ElicitationFnT | None = None, cache_tools: bool = True, cache_resources: bool = True, + use_server_instructions: bool = False, *, id: str | None = None, client_info: mcp_types.Implementation | None = None, @@ -401,6 +411,7 @@ def __init__( self.elicitation_callback = elicitation_callback self.cache_tools = cache_tools self.cache_resources = cache_resources + self.use_server_instructions = use_server_instructions self.client_info = client_info self._id = id or tool_prefix @@ -466,14 +477,50 @@ def capabilities(self) -> ServerCapabilities: return self._server_capabilities @property + @deprecated( + 'The `instructions` property is deprecated. ' + 'Set `use_server_instructions=True` to automatically inject server instructions ' + 'into the agent system prompt. The `instructions` property will be removed in a future release.' + ) def instructions(self) -> str | None: - """Access the instructions sent by the MCP server during initialization.""" + """Access the instructions sent by the MCP server during initialization. + + .. deprecated:: + Set ``use_server_instructions=True`` to automatically inject server instructions + into the agent system prompt. The ``instructions`` property will be removed in a future release. + """ if not hasattr(self, '_instructions'): raise AttributeError( f'The `{self.__class__.__name__}.instructions` is only available after initialization.' ) return self._instructions + async def get_instructions(self, ctx: RunContext[Any]) -> str | None: + """Return instructions to inject into the agent's system prompt. + + If ``use_server_instructions`` is ``True``, returns the instructions + sent by the MCP server during initialization. Otherwise, returns ``None``. + + Args: + ctx: The run context for this agent run. + + Returns: + The server's instructions if ``use_server_instructions`` is enabled, + otherwise ``None``. + + Raises: + AttributeError: If ``use_server_instructions`` is ``True`` but the server + has not been initialized yet. + """ + if self.use_server_instructions: + # Access internal attribute directly to avoid triggering deprecation warning + if not hasattr(self, '_instructions'): + raise AttributeError( + f'The `{self.__class__.__name__}` instructions are only available after initialization.' + ) + return self._instructions + return None + async def list_tools(self) -> list[mcp_types.Tool]: """Retrieve tools that are currently active on the server. @@ -864,6 +911,7 @@ class MCPServerStdio(MCPServer): elicitation_callback: ElicitationFnT | None = None cache_tools: bool cache_resources: bool + use_server_instructions: bool def __init__( self, @@ -884,6 +932,7 @@ def __init__( elicitation_callback: ElicitationFnT | None = None, cache_tools: bool = True, cache_resources: bool = True, + use_server_instructions: bool = False, id: str | None = None, client_info: mcp_types.Implementation | None = None, ): @@ -908,6 +957,8 @@ def __init__( See [`MCPServer.cache_tools`][pydantic_ai.mcp.MCPServer.cache_tools]. cache_resources: Whether to cache the list of resources. See [`MCPServer.cache_resources`][pydantic_ai.mcp.MCPServer.cache_resources]. + use_server_instructions: Whether to include the server's instructions in the agent's system prompt. + See [`MCPServer.use_server_instructions`][pydantic_ai.mcp.MCPServer.use_server_instructions]. id: An optional unique ID for the MCP server. An MCP server needs to have an ID in order to be used in a durable execution environment like Temporal, in which case the ID will be used to identify the server's activities within the workflow. client_info: Information describing the MCP client implementation. """ @@ -929,6 +980,7 @@ def __init__( elicitation_callback, cache_tools, cache_resources, + use_server_instructions, id=id, client_info=client_info, ) @@ -1031,6 +1083,7 @@ class _MCPServerHTTP(MCPServer): elicitation_callback: ElicitationFnT | None = None cache_tools: bool cache_resources: bool + use_server_instructions: bool def __init__( self, @@ -1051,6 +1104,7 @@ def __init__( elicitation_callback: ElicitationFnT | None = None, cache_tools: bool = True, cache_resources: bool = True, + use_server_instructions: bool = False, client_info: mcp_types.Implementation | None = None, **_deprecated_kwargs: Any, ): @@ -1075,6 +1129,8 @@ def __init__( See [`MCPServer.cache_tools`][pydantic_ai.mcp.MCPServer.cache_tools]. cache_resources: Whether to cache the list of resources. See [`MCPServer.cache_resources`][pydantic_ai.mcp.MCPServer.cache_resources]. + use_server_instructions: Whether to include the server's instructions in the agent's system prompt. + See [`MCPServer.use_server_instructions`][pydantic_ai.mcp.MCPServer.use_server_instructions]. client_info: Information describing the MCP client implementation. """ if 'sse_read_timeout' in _deprecated_kwargs: @@ -1108,6 +1164,7 @@ def __init__( elicitation_callback, cache_tools, cache_resources, + use_server_instructions, id=id, client_info=client_info, ) diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py index 98d9cd224f..2064fccfcf 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py @@ -114,6 +114,21 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[ """The tools that are available in this toolset.""" raise NotImplementedError() + async def get_instructions(self, ctx: RunContext[AgentDepsT]) -> str | None: + """Return instructions to inject into the agent's system prompt when this toolset is used. + + Override this method to provide custom instructions that help the agent understand + how to use the tools in this toolset effectively. + + Args: + ctx: The run context for this agent run. + + Returns: + Instructions string to add to the system prompt, or None if no instructions. + """ + del ctx # unused in base implementation + return None + @abstractmethod async def call_tool( self, name: str, tool_args: dict[str, Any], ctx: RunContext[AgentDepsT], tool: ToolsetTool[AgentDepsT] diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py index b18a732196..6cbcad2d5f 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/__init__.py @@ -13,17 +13,13 @@ skills_toolset = SkillsToolset(directories=["./skills"]) # Create agent with skills as a toolset + # Skills instructions are automatically injected via get_instructions() agent = Agent( model='openai:gpt-4o', instructions="You are a helpful research assistant.", toolsets=[skills_toolset] ) - # Add skills system prompt to agent - @agent.system_prompt - def add_skills_to_system_prompt() -> str: - return skills_toolset.get_skills_system_prompt() - # Use agent - skills tools are available for the agent to call result = await agent.run( "What are the last 3 papers on arXiv about machine learning?" diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py index 9b74511734..b15309100d 100644 --- a/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py +++ b/pydantic_ai_slim/pydantic_ai/toolsets/skills/_toolset.py @@ -21,6 +21,31 @@ ) from ._types import Skill +# Default instruction template for skills system prompt +DEFAULT_INSTRUCTION_TEMPLATE = """# Skills + +You have access to skills that extend your capabilities. Skills are modular packages containing instructions, resources, and scripts for specialized tasks. + +## Available Skills + +The following skills are available to you. Use them when relevant to the task: + +{skills_list} + +## How to Use Skills + +**Progressive disclosure**: Load skill information only when needed. + +1. **When a skill is relevant to the current task**: Use `load_skill(skill_name)` to read the full instructions. +2. **For additional documentation**: Use `read_skill_resource(skill_name, resource_name)` to read FORMS.md, REFERENCE.md, or other resources. +3. **To execute skill scripts**: Use `run_skill_script(skill_name, script_name, args)` with appropriate command-line arguments. + +**Best practices**: +- Select skills based on task relevance and descriptions listed above +- Use progressive disclosure: load only what you need, when you need it, starting with load_skill +- Follow the skill's documented usage patterns and examples +""" + def _is_safe_path(base_path: Path, target_path: Path) -> bool: """Check if target_path is safely within base_path (no path traversal). @@ -65,10 +90,7 @@ class SkillsToolset(FunctionToolset): instructions="You are a helpful assistant.", toolsets=[skills_toolset] ) - - @agent.system_prompt - def add_skills_prompt() -> str: - return skills_toolset.get_skills_system_prompt() + # Skills instructions are automatically injected via get_instructions() ``` """ @@ -82,6 +104,7 @@ def __init__( script_timeout: int = 30, python_executable: str | Path | None = None, max_depth: int | None = 3, + instruction_template: str | None = None, ) -> None: """Initialize the skills toolset. @@ -95,6 +118,8 @@ def __init__( If None, uses sys.executable (default). max_depth: Maximum depth to search for SKILL.md files. None for unlimited. Default is 3 levels deep to prevent performance issues with large trees. + instruction_template: Custom instruction template for skills system prompt. + Must include `{skills_list}` placeholder. If None, uses default template. """ super().__init__(id=id) @@ -103,6 +128,7 @@ def __init__( self._max_depth = max_depth self._script_timeout = script_timeout self._python_executable = str(python_executable) if python_executable else sys.executable + self._instruction_template = instruction_template self._skills: dict[str, Skill] = {} if auto_discover: @@ -330,61 +356,32 @@ async def run_skill_script( # noqa: D417 # pyright: ignore[reportUnusedFunctio except OSError as e: raise SkillScriptExecutionError(f"Failed to execute script '{script_name}': {e}") from e - def get_skills_system_prompt(self) -> str: - """Get the combined system prompt from all loaded skills. + async def get_instructions(self, ctx: RunContext[Any]) -> str | None: + """Return instructions to inject into the agent's system prompt. - This should be added to the agent's system prompt to provide - skill discovery and usage instructions. + Returns the skills system prompt containing all skill metadata + and usage guidance for the agent. - Following Anthropic's approach, this includes all skill metadata upfront - in the system prompt, enabling the agent to discover and select skills - without needing to call list_skills() first. + Args: + ctx: The run context for this agent run. Returns: - Formatted system prompt containing: - - All skill metadata (name + description) - - Instructions for using skill tools - - Progressive disclosure guidance + The skills system prompt, or None if no skills are loaded. """ if not self._skills: - return '' - - lines = [ - '# Skills', - '', - 'You have access to skills that extend your capabilities. Skills are modular packages', - 'containing instructions, resources, and scripts for specialized tasks.', - '', - '## Available Skills', - '', - 'The following skills are available to you. Use them when relevant to the task:', - '', - ] - - # List all skills with descriptions + return None + + # Build skills list + skills_list_lines: list[str] = [] for name, skill in sorted(self._skills.items()): - lines.append(f'- **{name}**: {skill.metadata.description}') + skills_list_lines.append(f'- **{name}**: {skill.metadata.description}') + skills_list = '\n'.join(skills_list_lines) - lines.extend( - [ - '', - '## How to Use Skills', - '', - '**Progressive disclosure**: Load skill information only when needed.', - '', - '1. **When a skill is relevant to the current task**: Use `load_skill(skill_name)` to read the full instructions.', - '2. **For additional documentation**: Use `read_skill_resource(skill_name, resource_name)` to read FORMS.md, REFERENCE.md, or other resources.', - '3. **To execute skill scripts**: Use `run_skill_script(skill_name, script_name, args)` with appropriate command-line arguments.', - '', - '**Best practices**:', - '- Select skills based on task relevance and descriptions listed above', - '- Use progressive disclosure: load only what you need, when you need it, starting with load_skill', - "- Follow the skill's documented usage patterns and examples", - '', - ] - ) + # Use custom template or default + template = self._instruction_template if self._instruction_template else DEFAULT_INSTRUCTION_TEMPLATE - return '\n'.join(lines) + # Format template with skills list + return template.format(skills_list=skills_list) @property def skills(self) -> dict[str, Skill]: diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 02bab17cc3..fafa402e0f 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -2049,9 +2049,61 @@ async def test_instructions(mcp_server: MCPServerStdio) -> None: with pytest.raises( AttributeError, match='The `MCPServerStdio.instructions` is only available after initialization.' ): - mcp_server.instructions + with pytest.warns(DeprecationWarning, match='The `instructions` property is deprecated'): + mcp_server.instructions # type: ignore[deprecated] async with mcp_server: - assert mcp_server.instructions == 'Be a helpful assistant.' + with pytest.warns(DeprecationWarning, match='The `instructions` property is deprecated'): + assert mcp_server.instructions == 'Be a helpful assistant.' # type: ignore[deprecated] + + +async def test_instructions_property_is_deprecated(mcp_server: MCPServerStdio) -> None: + """Test that accessing instructions property triggers deprecation warning.""" + async with mcp_server: + with pytest.warns(DeprecationWarning, match='Set `use_server_instructions=True`'): + _ = mcp_server.instructions # type: ignore[deprecated] + + +async def test_get_instructions_with_use_server_instructions_false(run_context: RunContext[int]) -> None: + """Test that get_instructions returns None when use_server_instructions is False.""" + server = MCPServerStdio('python', ['-m', 'tests.mcp_server'], use_server_instructions=False) + async with server: + instructions = await server.get_instructions(run_context) + assert instructions is None + + +async def test_get_instructions_with_use_server_instructions_true(run_context: RunContext[int]) -> None: + """Test that get_instructions returns server instructions when use_server_instructions is True.""" + server = MCPServerStdio('python', ['-m', 'tests.mcp_server'], use_server_instructions=True) + async with server: + instructions = await server.get_instructions(run_context) + assert instructions == 'Be a helpful assistant.' + + +async def test_get_instructions_raises_before_initialization(run_context: RunContext[int]) -> None: + """Test that get_instructions raises AttributeError when called before server is initialized.""" + server = MCPServerStdio('python', ['-m', 'tests.mcp_server'], use_server_instructions=True) + # Don't enter the context manager - server is not initialized + with pytest.raises(AttributeError, match='instructions are only available after initialization'): + await server.get_instructions(run_context) + + +async def test_mcp_instructions_injected_into_agent() -> None: + """Test that MCP server instructions are injected into agent when use_server_instructions=True.""" + from pydantic_ai.messages import ModelRequest + from pydantic_ai.models.test import TestModel + + server = MCPServerStdio('python', ['-m', 'tests.mcp_server'], use_server_instructions=True) + # Use TestModel with call_tools=[] to prevent tool calls that would fail + agent: Agent[None, str] = Agent(TestModel(call_tools=[]), toolsets=[server]) + + async with agent: + result = await agent.run('Hello') + + # Check that MCP instructions were included in the model request + model_requests = [m for m in result.all_messages() if isinstance(m, ModelRequest)] + assert any(m.instructions is not None and 'helpful assistant' in m.instructions for m in model_requests), ( + 'MCP server instructions should be injected when use_server_instructions=True' + ) async def test_client_info_passed_to_session() -> None: diff --git a/tests/test_skills.py b/tests/test_skills.py index e02f7205fb..41c78cebd1 100644 --- a/tests/test_skills.py +++ b/tests/test_skills.py @@ -722,37 +722,6 @@ async def test_run_skill_script_not_found(sample_skills_dir: Path) -> None: assert 'nonexistent' not in script_names -def test_get_skills_system_prompt(sample_skills_dir: Path) -> None: - """Test generating the system prompt.""" - toolset = SkillsToolset(directories=[sample_skills_dir]) - - prompt = toolset.get_skills_system_prompt() - - # Should include all skill names and descriptions - assert 'skill-one' in prompt - assert 'skill-two' in prompt - assert 'skill-three' in prompt - assert 'First test skill for basic operations' in prompt - assert 'Second test skill with resources' in prompt - assert 'Third test skill with executable scripts' in prompt - - # Should include usage instructions - assert 'load_skill' in prompt - assert 'read_skill_resource' in prompt - assert 'run_skill_script' in prompt - - # Should include progressive disclosure guidance - assert 'Progressive disclosure' in prompt or 'progressive disclosure' in prompt - - -def test_get_skills_system_prompt_empty() -> None: - """Test system prompt with no skills.""" - toolset = SkillsToolset(directories=[], auto_discover=False) - - prompt = toolset.get_skills_system_prompt() - assert prompt == '' - - def test_toolset_refresh(sample_skills_dir: Path) -> None: """Test refreshing skills.""" toolset = SkillsToolset(directories=[sample_skills_dir]) @@ -775,3 +744,98 @@ def test_toolset_refresh(sample_skills_dir: Path) -> None: assert len(toolset.skills) == initial_count + 1 assert 'skill-four' in toolset.skills + + +async def test_get_instructions_returns_system_prompt(sample_skills_dir: Path) -> None: + """Test that get_instructions() returns the skills system prompt.""" + from pydantic_ai.models.test import TestModel + from pydantic_ai.tools import RunContext + from pydantic_ai.usage import RunUsage + + toolset = SkillsToolset(directories=[sample_skills_dir]) + + # Create a minimal run context + ctx = RunContext[None](deps=None, model=TestModel(), usage=RunUsage()) + + instructions = await toolset.get_instructions(ctx) + + assert instructions is not None + # Should include all skill names and descriptions + assert 'skill-one' in instructions + assert 'skill-two' in instructions + assert 'skill-three' in instructions + assert 'First test skill for basic operations' in instructions + assert 'Second test skill with resources' in instructions + assert 'Third test skill with executable scripts' in instructions + # Should include usage instructions + assert 'load_skill' in instructions + assert 'read_skill_resource' in instructions + assert 'run_skill_script' in instructions + # Should include progressive disclosure guidance + assert 'Progressive disclosure' in instructions or 'progressive disclosure' in instructions + + +async def test_get_instructions_empty_toolset() -> None: + """Test that get_instructions() returns None for empty toolset.""" + from pydantic_ai.models.test import TestModel + from pydantic_ai.tools import RunContext + from pydantic_ai.usage import RunUsage + + toolset = SkillsToolset(directories=[], auto_discover=False) + + ctx = RunContext[None](deps=None, model=TestModel(), usage=RunUsage()) + + instructions = await toolset.get_instructions(ctx) + assert instructions is None + + +async def test_get_instructions_with_custom_template(sample_skills_dir: Path) -> None: + """Test get_instructions uses custom template when provided.""" + from pydantic_ai.models.test import TestModel + from pydantic_ai.tools import RunContext + from pydantic_ai.usage import RunUsage + + custom_template = """# My Custom Skills + +Available: +{skills_list} + +Use load_skill(name) for details. +""" + + toolset = SkillsToolset(directories=[sample_skills_dir], instruction_template=custom_template) + + ctx = RunContext[None](deps=None, model=TestModel(), usage=RunUsage()) + + instructions = await toolset.get_instructions(ctx) + + assert instructions is not None + # Should use custom template + assert '# My Custom Skills' in instructions + assert 'Available:' in instructions + assert 'Use load_skill(name) for details.' in instructions + # Should still have skill list + assert 'skill-one' in instructions + assert 'skill-two' in instructions + assert 'skill-three' in instructions + # Should NOT have default template text + assert 'Progressive disclosure' not in instructions + + +async def test_skills_instructions_injected_into_agent(sample_skills_dir: Path) -> None: + """Test that SkillsToolset instructions are automatically injected into agent runs.""" + from pydantic_ai import Agent + from pydantic_ai.messages import ModelRequest + from pydantic_ai.models.test import TestModel + + toolset = SkillsToolset(directories=[sample_skills_dir]) + agent: Agent[None, str] = Agent(TestModel(), toolsets=[toolset]) + + result = await agent.run('Hello') + + # Check that the instructions were included in the model request + # The instructions should be in the ModelRequest.instructions field + model_requests = [m for m in result.all_messages() if isinstance(m, ModelRequest)] + assert any(m.instructions is not None and 'skill-one' in m.instructions for m in model_requests), ( + 'Skills instructions should be injected into model request' + ) From 9dbc4be9a4c23c1b80f09cd239eeec5d597b4b43 Mon Sep 17 00:00:00 2001 From: Douglas Trajano Date: Sat, 20 Dec 2025 13:17:21 -0300 Subject: [PATCH 17/17] update docs --- docs/api/mcp.md | 6 + docs/api/skills.md | 29 ++ docs/mcp/client.md | 33 +- docs/skills.md | 549 --------------------------------- docs/skills/creating-skills.md | 282 +++++++++++++++++ docs/skills/overview.md | 128 ++++++++ docs/skills/using-skills.md | 302 ++++++++++++++++++ mkdocs.yml | 6 +- 8 files changed, 783 insertions(+), 552 deletions(-) create mode 100644 docs/api/skills.md delete mode 100644 docs/skills.md create mode 100644 docs/skills/creating-skills.md create mode 100644 docs/skills/overview.md create mode 100644 docs/skills/using-skills.md diff --git a/docs/api/mcp.md b/docs/api/mcp.md index bc9f4592dd..3309ec03e7 100644 --- a/docs/api/mcp.md +++ b/docs/api/mcp.md @@ -1,3 +1,9 @@ # `pydantic_ai.mcp` ::: pydantic_ai.mcp + options: + members: + - MCPServer + - MCPServerStreamableHTTP + - MCPServerSSE + - MCPServerStdio diff --git a/docs/api/skills.md b/docs/api/skills.md new file mode 100644 index 0000000000..23f3785152 --- /dev/null +++ b/docs/api/skills.md @@ -0,0 +1,29 @@ +# Skills API Reference + +API reference for the Skills framework types and functions. + +::: pydantic_ai.toolsets.skills.SkillsToolset + options: + members: + - __init__ + - get_skill + - refresh + - skills + +::: pydantic_ai.toolsets.skills.Skill + +::: pydantic_ai.toolsets.skills.SkillMetadata + +::: pydantic_ai.toolsets.skills.SkillResource + +::: pydantic_ai.toolsets.skills.SkillScript + +::: pydantic_ai.toolsets.skills.discover_skills + +::: pydantic_ai.toolsets.skills.SkillNotFoundError + +::: pydantic_ai.toolsets.skills.SkillValidationError + +::: pydantic_ai.toolsets.skills.SkillResourceLoadError + +::: pydantic_ai.toolsets.skills.SkillScriptExecutionError diff --git a/docs/mcp/client.md b/docs/mcp/client.md index 817ab21f92..e3f1298985 100644 --- a/docs/mcp/client.md +++ b/docs/mcp/client.md @@ -340,9 +340,38 @@ agent = Agent('openai:gpt-5', toolsets=[weather_server, calculator_server]) ## Server Instructions -MCP servers can provide instructions during initialization that give context about how to best interact with the server's tools. These instructions are accessible via the [`instructions`][pydantic_ai.mcp.MCPServer.instructions] property after the server connection is established. +MCP servers can provide instructions during initialization that give context about how to best interact with the server's tools. -```python {title="mcp_server_instructions.py"} +### Automatic Instruction Injection (Recommended) + +The recommended approach is to enable automatic instruction injection when creating the MCP server. This automatically includes the server's instructions in the agent's system prompt via the toolset's `get_instructions()` method: + +```python {title="mcp_server_instructions_auto.py"} +from pydantic_ai import Agent +from pydantic_ai.mcp import MCPServerStreamableHTTP + +server = MCPServerStreamableHTTP( + 'http://localhost:8000/mcp', + use_server_instructions=True # (1)! +) +agent = Agent('openai:gpt-5', toolsets=[server]) + +async def main(): + result = await agent.run('What is 7 plus 5?') + print(result.output) + #> The answer is 12. +``` + +1. Enable automatic instruction injection. The server's instructions will be included in the system prompt automatically. + +### Manual Instruction Access (Deprecated) + +!!! warning "Deprecated Approach" + Manually accessing `server.instructions` is deprecated in favor of using `use_server_instructions=True`. The `instructions` property will be removed in a future version. + +You can still manually access instructions via the [`instructions`][pydantic_ai.mcp.MCPServer.instructions] property after the server connection is established: + +```python {title="mcp_server_instructions_manual.py"} from pydantic_ai import Agent from pydantic_ai.mcp import MCPServerStreamableHTTP diff --git a/docs/skills.md b/docs/skills.md deleted file mode 100644 index ca4ec34caf..0000000000 --- a/docs/skills.md +++ /dev/null @@ -1,549 +0,0 @@ -# Skills - -A standardized, composable framework for building and managing Agent Skills. Skills are modular collections of instructions, scripts, tools, and resources that enable AI agents to progressively discover, load, and execute specialized capabilities for domain-specific tasks. - -## What are Agent Skills? - -Agent Skills are **modular packages** that extend your agent's capabilities without hardcoding every possible feature into your agent's instructions. Think of them as plugins that agents can discover and load on-demand. - -Key benefits: - -- **🔍 Progressive Discovery**: Agents list available skills and load only what they need -- **📦 Modular Design**: Each skill is a self-contained directory with instructions and resources -- **🛠️ Script Execution**: Skills can include executable Python scripts -- **📚 Resource Management**: Support for additional documentation and data files -- **🚀 Easy Integration**: Simple toolset interface that works with any Pydantic AI agent - -## Quick Example - -```python -from pydantic_ai import Agent, SkillsToolset - -# Initialize Skills Toolset with skill directories -skills_toolset = SkillsToolset(directories=["./skills"]) - -# Create agent with skills -# Skills instructions are automatically injected via get_instructions() -agent = Agent( - model='openai:gpt-4o', - instructions="You are a helpful research assistant.", - toolsets=[skills_toolset] -) - -# Use agent - skills tools are automatically available -result = await agent.run( - "What are the last 3 papers on arXiv about machine learning?" -) -print(result.output) -``` - -!!! note "Alternative Import" - You can also import `SkillsToolset` from `pydantic_ai.toolsets`: - ```python - from pydantic_ai.toolsets import SkillsToolset - ``` - -## How It Works - -1. **Discovery**: The toolset scans specified directories for skills (folders with `SKILL.md` files) -2. **Registration**: Skills are registered as tools on your agent -3. **Progressive Loading**: Agents can: - - List all available skills with `list_skills()` (optional, as skills are in system prompt) - - Load detailed instructions with `load_skill(name)` - - Read additional resources with `read_skill_resource(skill_name, resource_name)` - - Execute scripts with `run_skill_script(skill_name, script_name, args)` - -## Creating Skills - -### Basic Skill Structure - -Every skill must have at minimum a `SKILL.md` file: - -```markdown -my-skill/ -├── SKILL.md # Required: Instructions and metadata -├── scripts/ # Optional: Executable scripts -│ └── my_script.py -└── resources/ # Optional: Additional files -├── reference.md -└── data.json -``` - -### SKILL.md Format - -The `SKILL.md` file uses **YAML frontmatter** for metadata and **Markdown** for instructions: - -````markdown ---- -name: arxiv-search -description: Search arXiv for research papers -version: 1.0.0 -author: Your Name -tags: [papers, arxiv, academic] ---- - -# arXiv Search Skill - -## When to Use - -Use this skill when you need to: - -- Find recent preprints in physics, math, or computer science -- Search for papers not yet published in journals -- Access cutting-edge research - -## Instructions - -To search arXiv, use the `run_skill_script` tool with: - -1. **skill_name**: "arxiv-search" -2. **script_name**: "arxiv_search" -3. **args**: Your search query and options - -## Example - -```python -run_skill_script( - skill_name="arxiv-search", - script_name="arxiv_search", - args=["machine learning", "--max-papers", "5"] -) -``` -```` - -### Required Fields - -- `name`: Unique identifier (lowercase letters, numbers, and hyphens only) -- `description`: Brief summary (appears in skill listings, max 1024 characters) - -### Naming Conventions - -Following Anthropic's skill naming conventions: - -| Requirement | Example | -| ------------------ | -------------------------------------- | -| Lowercase only | `arxiv-search` ✅, `ArxivSearch` ❌ | -| Hyphens for spaces | `web-research` ✅, `web_research` ❌ | -| Max 64 characters | `data-analyzer` ✅ | -| No reserved words | Avoid "anthropic" or "claude" in names | - -## Progressive Disclosure - -The toolset implements **progressive disclosure** - exposing information only when needed: - -```markdown -┌─────────────────────────────────────────────────────────────┐ -│ System Prompt (automatically injected via toolset) │ -│ ┌───────────────────────────────────────────────────────┐ │ -│ │ Available Skills: │ │ -│ │ - arxiv-search: Search arXiv for research papers │ │ -│ │ - web-research: Research topics on the web │ │ -│ │ - data-analyzer: Analyze CSV and JSON files │ │ -│ └───────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - │ - ▼ - Agent sees skill names & descriptions - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ load_skill("arxiv-search") │ -│ ┌───────────────────────────────────────────────────────┐ │ -│ │ Returns full SKILL.md instructions: │ │ -│ │ - When to use │ │ -│ │ - Step-by-step guide │ │ -│ │ - Example invocations │ │ -│ │ - Available resources and scripts │ │ -│ └───────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - │ - ▼ - Agent loads detailed instructions when needed -``` - -This approach: - -- **Reduces initial context size** - Only metadata is in the system prompt -- **Lets agents discover capabilities dynamically** - Load what's needed -- **Improves token efficiency** - Don't pay for unused instructions -- **Scales to many skills** - Add hundreds of skills without bloating prompts - -## The Four Tools - -The `SkillsToolset` provides four tools to agents: - -### 1. `list_skills()` - -Lists all available skills with their descriptions. - -**Returns**: Formatted markdown with skill names and descriptions - -**When to use**: Optional - skills are already listed in the system prompt automatically. Use only if the agent needs to re-check available skills dynamically. - -### 2. `load_skill(skill_name)` - -Loads the complete instructions for a specific skill. - -**Parameters**: - -- `skill_name` (str) - Name of the skill to load - -**Returns**: Full SKILL.md content including detailed instructions, available resources, and scripts - -**When to use**: When the agent needs detailed instructions for using a skill - -### 3. `read_skill_resource(skill_name, resource_name)` - -Reads additional resource files from a skill. - -**Parameters**: - -- `skill_name` (str) - Name of the skill -- `resource_name` (str) - Resource filename (e.g., "FORMS.md") - -**Returns**: Content of the resource file - -**When to use**: When a skill references additional documentation or data files - -### 4. `run_skill_script(skill_name, script_name, args)` - -Executes a Python script from a skill. - -**Parameters**: - -- `skill_name` (str) - Name of the skill -- `script_name` (str) - Script name without .py extension -- `args` (list[str], optional) - Command-line arguments - -**Returns**: Script output (stdout and stderr combined) - -**When to use**: When a skill needs to execute custom code - -## Adding Scripts to Skills - -Scripts enable skills to perform custom operations that aren't available as standard agent tools. - -### Script Location - -Place scripts in either: - -- `scripts/` subdirectory (recommended) -- Directly in the skill folder - -```markdown -my-skill/ -├── SKILL.md -└── scripts/ - ├── process_data.py - └── fetch_info.py -``` - -### Writing Scripts - -Scripts should: - -- Accept command-line arguments via `sys.argv` -- Print output to stdout -- Exit with code 0 on success, non-zero on error -- Handle errors gracefully - -```python -#!/usr/bin/env python3 -"""Example skill script.""" - -import sys -import json - -def main(): - if len(sys.argv) < 2: - print("Usage: process_data.py ") - sys.exit(1) - - input_data = sys.argv[1] - - try: - # Process the input - result = {"processed": input_data.upper()} - print(json.dumps(result, indent=2)) - - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - sys.exit(1) - -if __name__ == "__main__": - main() -``` - -## SkillsToolset API - -### Initialization - -```python -from pydantic_ai.toolsets import SkillsToolset - -toolset = SkillsToolset( - directories=["./skills", "./shared-skills"], - auto_discover=True, # Auto-discover skills on init (default: True) - validate=True, # Validate skill structure (default: True) - id="skills", # Unique identifier (default: "skills") - script_timeout=30, # Script execution timeout in seconds (default: 30) - python_executable=None, # Python executable path (default: sys.executable) - instruction_template=None, # Custom instruction template (default: None) -) -``` - -### Key Methods - -| Method | Description | -| ----------------- | ----------------------------------------- | -| `get_skill(name)` | Get a specific skill object by name | -| `refresh()` | Re-scan directories for skills | - -### Properties - -| Property | Description | -| -------- | ------------------------------------------------ | -| `skills` | Dictionary of loaded skills (`dict[str, Skill]`) | - -### Customizing Instructions - -You can customize the instruction template that gets injected into the agent's system prompt: - -```python -custom_template = """# My Custom Skills Section - -Available tools: -{skills_list} - -Use load_skill(name) to get details. -""" - -toolset = SkillsToolset( - directories=["./skills"], - instruction_template=custom_template -) -``` - -The template must include the `{skills_list}` placeholder, which will be replaced with the formatted list of available skills. - -## Skill Discovery - -Skills can be discovered programmatically: - -```python -from pydantic_ai.toolsets import discover_skills - -skills = discover_skills( - directories=["./skills"], - validate=True -) - -for skill in skills: - print(f"{skill.name}: {skill.metadata.description}") - print(f" Resources: {[r.name for r in skill.resources]}") - print(f" Scripts: {[s.name for s in skill.scripts]}") -``` - -## Type Reference - -### Skill - -```python -from pydantic_ai import SkillsToolset -from pydantic_ai.toolsets.skills import Skill - -skill = Skill( - name="my-skill", - path=Path("./skills/my-skill"), - metadata=SkillMetadata(...), - content="# Instructions...", - resources=[SkillResource(...)], - scripts=[SkillScript(...)], -) -``` - -### SkillMetadata - -```python -from pydantic_ai.toolsets.skills import SkillMetadata - -metadata = SkillMetadata( - name="my-skill", - description="My skill description", - extra={"version": "1.0.0", "author": "Me"} -) -``` - -### SkillResource - -```python -from pydantic_ai.toolsets.skills import SkillResource - -resource = SkillResource( - name="FORMS.md", - path=Path("./skills/my-skill/FORMS.md"), - content=None, # Lazy-loaded -) -``` - -### SkillScript - -```python -from pydantic_ai.toolsets.skills import SkillScript - -script = SkillScript( - name="process_data", - path=Path("./skills/my-skill/scripts/process_data.py"), - skill_name="my-skill", -) -``` - -## Security Considerations - -!!! warning "Use Skills from Trusted Sources Only" - - Skills provide AI agents with new capabilities through instructions and code. While this makes them powerful, it also means a malicious skill can direct agents to invoke tools or execute code in ways that don't match the skill's stated purpose. - - If you must use a skill from an untrusted or unknown source, exercise extreme caution and thoroughly audit it before use. Depending on what access agents have when executing the skill, malicious skills could lead to data exfiltration, unauthorized system access, or other security risks. - -The toolset includes security measures: - -- **Path traversal prevention**: Resources and scripts are validated to stay within the skill directory -- **Script timeout**: Scripts have a configurable timeout (default: 30 seconds) -- **Subprocess execution**: Scripts run in a separate process, but with the same OS-level permissions as your agent process (this is not a security sandbox) - -## Complete Example - -Here's a complete example with a skill that searches for research papers: - -### Skill Structure - -```markdown -skills/ -└── arxiv-search/ - ├── SKILL.md - └── scripts/ - └── arxiv_search.py -``` - -### SKILL.md - -````markdown ---- -name: arxiv-search -description: Search arXiv for research papers by query ---- - -# arXiv Search - -Search the arXiv preprint server for academic papers. - -## Usage - -Use `run_skill_script` with: - -- **script_name**: "arxiv_search" -- **args**: ["your search query", "--max-papers", "5"] - -## Example - -To find papers about transformers: - -```python -run_skill_script( - skill_name="arxiv-search", - script_name="arxiv_search", - args=["transformers attention mechanism", "--max-papers", "3"] -) -``` -```` - -### arxiv_search.py - -```python -#!/usr/bin/env python3 -"""Search arXiv for papers.""" - -import argparse -import urllib.request -import urllib.parse -import xml.etree.ElementTree as ET - -def search_arxiv(query: str, max_results: int = 5) -> list[dict]: - """Search arXiv API.""" - base_url = "http://export.arxiv.org/api/query" - params = { - "search_query": f"all:{query}", - "start": 0, - "max_results": max_results, - "sortBy": "submittedDate", - "sortOrder": "descending", - } - url = f"{base_url}?{urllib.parse.urlencode(params)}" - - with urllib.request.urlopen(url) as response: - data = response.read() - - root = ET.fromstring(data) - ns = {"atom": "http://www.w3.org/2005/Atom"} - - results = [] - for entry in root.findall("atom:entry", ns): - title = entry.find("atom:title", ns).text.strip() - summary = entry.find("atom:summary", ns).text.strip()[:200] - link = entry.find("atom:id", ns).text - results.append({"title": title, "summary": summary, "link": link}) - - return results - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("query", help="Search query") - parser.add_argument("--max-papers", type=int, default=5) - args = parser.parse_args() - - results = search_arxiv(args.query, args.max_papers) - - for i, paper in enumerate(results, 1): - print(f"{i}. {paper['title']}") - print(f" {paper['summary']}...") - print(f" Link: {paper['link']}") - print() - -if __name__ == "__main__": - main() -``` - -### Agent Code - -```python -import asyncio -from pydantic_ai import Agent, SkillsToolset - -async def main(): - skills_toolset = SkillsToolset(directories=["./skills"]) - - agent = Agent( - model='openai:gpt-4o', - instructions="You are a research assistant.", - toolsets=[skills_toolset] - ) - # Skills instructions are automatically injected - - result = await agent.run( - "Find the 3 most recent papers about large language models" - ) - print(result.output) - -if __name__ == "__main__": - asyncio.run(main()) -``` - -## References - -This implementation is inspired by: - -- [langchain-ai/deepagents](https://github.com/langchain-ai/deepagents/tree/master) -- [vstorm-co/pydantic-deepagents](https://github.com/vstorm-co/pydantic-deepagents/tree/main) -- [Introducing Agent Skills | Anthropic](https://www.anthropic.com/news/agent-skills) -- [Using skills with Deep Agents | LangChain](https://blog.langchain.com/using-skills-with-deep-agents/) diff --git a/docs/skills/creating-skills.md b/docs/skills/creating-skills.md new file mode 100644 index 0000000000..7d6f52764e --- /dev/null +++ b/docs/skills/creating-skills.md @@ -0,0 +1,282 @@ +# Creating Skills + +This guide covers everything you need to know about creating your own Agent Skills, from basic structure to advanced patterns. + +## Skill Structure + +Every skill must have at minimum a `SKILL.md` file: + +```markdown +my-skill/ +├── SKILL.md # Required: Instructions and metadata +├── scripts/ # Optional: Executable scripts +│ └── my_script.py +└── resources/ # Optional: Additional files + ├── reference.md + └── data.json +``` + +## SKILL.md Format + +The `SKILL.md` file uses **YAML frontmatter** for metadata and **Markdown** for instructions: + +````markdown {title="SKILL.md"} +--- +name: arxiv-search +description: Search arXiv for research papers +version: 1.0.0 +author: Your Name +tags: [papers, arxiv, academic] +--- + +# arXiv Search Skill + +## When to Use + +Use this skill when you need to: +- Find recent preprints in physics, math, or computer science +- Search for papers not yet published in journals +- Access cutting-edge research + +## Instructions + +To search arXiv, use the `run_skill_script` tool with: + +1. **skill_name**: "arxiv-search" +2. **script_name**: "arxiv_search" +3. **args**: Your search query and options + +## Example + +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["machine learning", "--max-papers", "5"] +) +``` +```` + +## Required Fields + +The YAML frontmatter must include: + +- `name`: Unique identifier (lowercase letters, numbers, and hyphens only) +- `description`: Brief summary (appears in skill listings, max 1024 characters) + +All other fields are optional and stored in the `extra` dictionary of [`SkillMetadata`](../api/skills.md#pydantic_ai.toolsets.skills.SkillMetadata). + +## Naming Conventions + +Following Anthropic's skill naming conventions: + +| Requirement | Example | +|------------|---------| +| Lowercase only | `arxiv-search` ✅, `ArxivSearch` ❌ | +| Hyphens for spaces | `web-research` ✅, `web_research` ❌ | +| Max 64 characters | `data-analyzer` ✅ | +| No reserved words | Avoid "anthropic" or "claude" in names | + +## Adding Scripts to Skills + +Scripts enable skills to perform custom operations that aren't available as standard agent tools. + +### Script Location + +Place scripts in either: + +- `scripts/` subdirectory (recommended) +- Directly in the skill folder + +```markdown +my-skill/ +├── SKILL.md +└── scripts/ + ├── process_data.py + └── fetch_info.py +``` + +### Writing Scripts + +Scripts should: + +- Accept command-line arguments via `sys.argv` +- Print output to stdout +- Exit with code 0 on success, non-zero on error +- Handle errors gracefully + +```python {title="process_data.py"} +#!/usr/bin/env python3 +"""Example skill script.""" + +import sys +import json + +def main(): + if len(sys.argv) < 2: + print("Usage: process_data.py ") + sys.exit(1) + + input_data = sys.argv[1] + + try: + # Process the input + result = {"processed": input_data.upper()} + print(json.dumps(result, indent=2)) + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + +if __name__ == "__main__": + main() +``` + +## Complete Example + +Here's a complete example with a skill that searches for research papers: + +```markdown +skills/ +└── arxiv-search/ + ├── SKILL.md + └── scripts/ + └── arxiv_search.py +``` + +````markdown {title="SKILL.md"} +--- +name: arxiv-search +description: Search arXiv for research papers by query +--- + +# arXiv Search + +Search the arXiv preprint server for academic papers. + +## Usage + +Use `run_skill_script` with: +- **script_name**: "arxiv_search" +- **args**: ["your search query", "--max-papers", "5"] + +## Example + +To find papers about transformers: + +```python +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["transformers attention mechanism", "--max-papers", "3"] +) +``` +```` + +```python {title="arxiv_search.py"} +#!/usr/bin/env python3 +"""Search arXiv for papers.""" + +import argparse +import urllib.request +import urllib.parse +import xml.etree.ElementTree as ET + +def search_arxiv(query: str, max_results: int = 5) -> list[dict]: + """Search arXiv API.""" + base_url = "http://export.arxiv.org/api/query" + params = { + "search_query": f"all:{query}", + "start": 0, + "max_results": max_results, + "sortBy": "submittedDate", + "sortOrder": "descending", + } + url = f"{base_url}?{urllib.parse.urlencode(params)}" + + with urllib.request.urlopen(url) as response: + data = response.read() + + root = ET.fromstring(data) + ns = {"atom": "http://www.w3.org/2005/Atom"} + + results = [] + for entry in root.findall("atom:entry", ns): + title = entry.find("atom:title", ns).text.strip() + summary = entry.find("atom:summary", ns).text.strip()[:200] + link = entry.find("atom:id", ns).text + results.append({"title": title, "summary": summary, "link": link}) + + return results + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("query", help="Search query") + parser.add_argument("--max-papers", type=int, default=5) + args = parser.parse_args() + + results = search_arxiv(args.query, args.max_papers) + + for i, paper in enumerate(results, 1): + print(f"{i}. {paper['title']}") + print(f" {paper['summary']}...") + print(f" Link: {paper['link']}") + print() + +if __name__ == "__main__": + main() +``` + +```python {title="agent_example.py"} +import asyncio +from pydantic_ai import Agent, SkillsToolset + +async def main(): + skills_toolset = SkillsToolset(directories=["./skills"]) + + agent = Agent( + model='openai:gpt-4o', + instructions="You are a research assistant.", + toolsets=[skills_toolset] + ) + # Skills instructions are automatically injected + + result = await agent.run( + "Find the 3 most recent papers about large language models" + ) + print(result.output) + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Best Practices + +### Documentation + +- Write clear, concise descriptions (they appear in skill listings) +- Include "When to Use" sections to guide agents +- Provide multiple examples showing different usage patterns +- Document all script arguments and expected output formats + +### Scripts + +- Keep scripts focused on a single responsibility +- Use descriptive script names (e.g., `search_papers.py` not `script1.py`) +- Include helpful error messages +- Return structured output (JSON) when possible +- Test scripts independently before adding to skills + +### Resources + +- Use the `resources/` directory for reference documentation +- Keep resource files small and focused +- Use clear, descriptive filenames +- Reference resources in your `SKILL.md` instructions + +### Organization + +- Group related skills in subdirectories +- Use consistent naming across your skills +- Version your skills in metadata for tracking +- Document dependencies in `SKILL.md` diff --git a/docs/skills/overview.md b/docs/skills/overview.md new file mode 100644 index 0000000000..1f483082f7 --- /dev/null +++ b/docs/skills/overview.md @@ -0,0 +1,128 @@ +# Skills + +A standardized, composable framework for building and managing Agent Skills. Skills are modular collections of instructions, scripts, tools, and resources that enable AI agents to progressively discover, load, and execute specialized capabilities for domain-specific tasks. + +## What are Agent Skills? + +Agent Skills are **modular packages** that extend your agent's capabilities without hardcoding every possible feature into your agent's instructions. Think of them as plugins that agents can discover and load on-demand. + +Key benefits: + +- **🔍 Progressive Discovery**: Skills are listed in the system prompt; agents load detailed instructions only when needed +- **📦 Modular Design**: Each skill is a self-contained directory with instructions and resources +- **🛠️ Script Execution**: Skills can include executable Python scripts +- **📚 Resource Management**: Support for additional documentation and data files +- **🚀 Easy Integration**: Simple toolset interface that works with any Pydantic AI agent +- **⚡ Automatic Injection**: Skill metadata is automatically added to the agent's system prompt via `get_instructions()` + +## Quick Example + +```python {title="agent_with_skills.py"} +from pydantic_ai import Agent, SkillsToolset + +# Initialize Skills Toolset with skill directories +skills_toolset = SkillsToolset(directories=["./skills"]) + +# Create agent with skills +# Skills instructions are automatically injected via get_instructions() +agent = Agent( + model='openai:gpt-4o', + instructions="You are a helpful research assistant.", + toolsets=[skills_toolset] +) + +# Use agent - skills tools are automatically available +result = await agent.run( + "What are the last 3 papers on arXiv about machine learning?" +) +print(result.output) +``` + +!!! note "Alternative Import" + You can also import `SkillsToolset` from `pydantic_ai.toolsets`: + ```python + from pydantic_ai.toolsets import SkillsToolset + ``` + +## How It Works + +1. **Discovery**: The toolset scans specified directories for skills (folders with `SKILL.md` files) +2. **Automatic Injection**: Skill names and descriptions are automatically injected into the agent's system prompt via the toolset's `get_instructions()` method +3. **Registration**: Four skill management tools are registered with the agent +4. **Progressive Loading**: Agents can: + - List all available skills with `list_skills()` (optional, as skills are already in system prompt) + - Load detailed instructions with `load_skill(name)` + - Read additional resources with `read_skill_resource(skill_name, resource_name)` + - Execute scripts with `run_skill_script(skill_name, script_name, args)` + +## Progressive Disclosure + +The toolset implements **progressive disclosure** - exposing information only when needed: + +```markdown +┌─────────────────────────────────────────────────────────────┐ +│ System Prompt (automatically injected via toolset) │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ Available Skills: │ │ +│ │ - arxiv-search: Search arXiv for research papers │ │ +│ │ - web-research: Research topics on the web │ │ +│ │ - data-analyzer: Analyze CSV and JSON files │ │ +│ └───────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + Agent sees skill names & descriptions + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ load_skill("arxiv-search") │ +│ ┌───────────────────────────────────────────────────────┐ │ +│ │ Returns full SKILL.md instructions: │ │ +│ │ - When to use │ │ +│ │ - Step-by-step guide │ │ +│ │ - Example invocations │ │ +│ │ - Available resources and scripts │ │ +│ └───────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + Agent loads detailed instructions when needed +``` + +This approach: + +- **Reduces initial context size** - Only metadata is in the system prompt +- **Lets agents discover capabilities dynamically** - Load what's needed +- **Improves token efficiency** - Don't pay for unused instructions +- **Scales to many skills** - Add hundreds of skills without bloating prompts + +## Security Considerations + +!!! warning "Use Skills from Trusted Sources Only" + + Skills provide AI agents with new capabilities through instructions and code. While this makes them powerful, it also means a malicious skill can direct agents to invoke tools or execute code in ways that don't match the skill's stated purpose. + + If you must use a skill from an untrusted or unknown source, exercise extreme caution and thoroughly audit it before use. Depending on what access agents have when executing the skill, malicious skills could lead to data exfiltration, unauthorized system access, or other security risks. + +The toolset includes security measures: + +- **Path traversal prevention**: Resources and scripts are validated to stay within the skill directory using `_is_safe_path()` to prevent directory traversal attacks +- **Script timeout**: Scripts have a configurable timeout (default: 30 seconds) enforced via `anyio.move_on_after()` to prevent hung processes +- **Subprocess execution**: Scripts run in a separate process via `anyio.run_process()`, but with the same OS-level permissions as your agent process +- **Resource depth limit**: Resource reading is limited to a maximum depth of 3 levels within the skill directory (`max_depth=3`) to prevent excessive file system traversal + +## Next Steps + +- [Creating Skills](creating-skills.md) - Learn how to build your own skills +- [Using Skills](using-skills.md) - Learn how to integrate and use skills in your agents +- [API Reference](../api/skills.md) - Detailed type and API documentation + +## References + +This implementation is inspired by: + +- [DougTrajano/pydantic-ai-skills](https://github.com/DougTrajano/pydantic-ai-skills/) +- [vstorm-co/pydantic-deepagents](https://github.com/vstorm-co/pydantic-deepagents/) +- [langchain-ai/deepagents](https://github.com/langchain-ai/deepagents/) +- [Introducing Agent Skills | Anthropic](https://www.anthropic.com/news/agent-skills) +- [Using skills with Deep Agents | LangChain](https://blog.langchain.com/using-skills-with-deep-agents/) diff --git a/docs/skills/using-skills.md b/docs/skills/using-skills.md new file mode 100644 index 0000000000..2c649916fb --- /dev/null +++ b/docs/skills/using-skills.md @@ -0,0 +1,302 @@ +# Using Skills + +This guide covers how to integrate and use the Skills framework in your Pydantic AI agents. + +## SkillsToolset API + +The [`SkillsToolset`][pydantic_ai.toolsets.skills.SkillsToolset] is the main entry point for working with skills. + +### Initialization + +```python +from pydantic_ai.toolsets import SkillsToolset + +toolset = SkillsToolset( + directories=["./skills", "./shared-skills"], + auto_discover=True, # Auto-discover skills on init (default: True) + validate=True, # Validate skill structure (default: True) + id=None, # Unique identifier (default: None) + script_timeout=30, # Script execution timeout in seconds (default: 30) + python_executable=None, # Python executable path (default: sys.executable) + instruction_template=None, # Custom instruction template (default: None) +) +``` + +### Key Methods + +| Method | Description | +|--------|-------------| +| `get_skill(name)` | Get a specific skill object by name. Raises `SkillNotFoundError` if not found | +| `refresh()` | Re-scan directories for skills | + +### Properties + +| Property | Description | +|----------|-------------| +| `skills` | Dictionary of loaded skills (`dict[str, Skill]`) | + +### Customizing Instructions + +You can customize the instruction template that gets injected into the agent's system prompt: + +```python +custom_template = """# My Custom Skills Section + +Available tools: +{skills_list} + +Use load_skill(name) to get details. +""" + +toolset = SkillsToolset( + directories=["./skills"], + instruction_template=custom_template +) +``` + +The template must include the `{skills_list}` placeholder, which will be replaced with the formatted list of available skills. + +## The Four Tools + +The `SkillsToolset` provides four tools to agents: + +### 1. `list_skills()` + +Lists all available skills with their descriptions. + +**Returns**: Formatted markdown with skill names and descriptions + +**When to use**: Optional - skills are already listed in the system prompt automatically. Use only if the agent needs to re-check available skills dynamically. + +**Example**: + +```python +# Agent can call this tool +list_skills() + +# Output: +# Available Skills: +# - arxiv-search: Search arXiv for research papers +# - web-research: Research topics on the web +# - data-analyzer: Analyze CSV and JSON files +``` + +### 2. `load_skill(skill_name)` + +Loads the complete instructions for a specific skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill to load + +**Returns**: Full SKILL.md content (as a string) including detailed instructions, available resources, and scripts + +**When to use**: When the agent needs detailed instructions for using a skill + +**Example**: + +```python +# Agent loads skill details +load_skill("arxiv-search") + +# Returns full SKILL.md content with: +# - When to use +# - Step-by-step instructions +# - Example invocations +# - Available resources and scripts +``` + +### 3. `read_skill_resource(skill_name, resource_name)` + +Reads additional resource files from a skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill +- `resource_name` (str) - Resource filename (e.g., "FORMS.md") + +**Returns**: Content of the resource file + +**When to use**: When a skill references additional documentation or data files + +**Example**: + +```python +# Agent reads a skill resource +read_skill_resource("web-research", "FORMS.md") + +# Returns content of the FORMS.md file +``` + +### 4. `run_skill_script(skill_name, script_name, args)` + +Executes a Python script from a skill. + +**Parameters**: + +- `skill_name` (str) - Name of the skill +- `script_name` (str) - Script name without .py extension +- `args` (list[str] | None, optional) - Command-line arguments passed to the script + +**Returns**: Script output (stdout and stderr combined) + +**When to use**: When a skill needs to execute custom code + +**Example**: + +```python +# Agent executes a script +run_skill_script( + skill_name="arxiv-search", + script_name="arxiv_search", + args=["machine learning", "--max-papers", "3"] +) + +# Returns script output with search results +``` + +## Skill Discovery + +Skills can be discovered programmatically using the [`discover_skills`][pydantic_ai.toolsets.skills.discover_skills] function: + +```python +from pydantic_ai.toolsets import discover_skills + +skills = discover_skills( + directories=["./skills"], + validate=True +) + +for skill in skills: + print(f"{skill.name}: {skill.metadata.description}") + print(f" Resources: {[r.name for r in skill.resources]}") + print(f" Scripts: {[s.name for s in skill.scripts]}") +``` + +This is useful for: + +- Listing available skills before creating an agent +- Validating skill structure in tests +- Building custom skill management tools +- Generating documentation about available skills + +## Usage Patterns + +### Basic Usage + +```python +from pydantic_ai import Agent +from pydantic_ai.toolsets import SkillsToolset + +# Create toolset with skills +skills_toolset = SkillsToolset(directories=["./skills"]) + +# Create agent with skills +agent = Agent( + model='openai:gpt-4o', + instructions="You are a helpful assistant.", + toolsets=[skills_toolset] +) + +# Agent automatically has access to all skill tools +result = await agent.run("Search for papers about transformers") +``` + +### Multiple Skill Directories + +```python +# Load skills from multiple directories +skills_toolset = SkillsToolset( + directories=[ + "./my-skills", # Project-specific skills + "./shared-skills", # Shared across projects + "~/.pydantic-ai/skills" # Global skills + ] +) +``` + +### Custom Script Timeout + +```python +# Increase timeout for long-running scripts +skills_toolset = SkillsToolset( + directories=["./skills"], + script_timeout=120 # 2 minutes +) +``` + +### Programmatic Access + +```python +# Access skills programmatically +toolset = SkillsToolset(directories=["./skills"]) + +# Get a specific skill +skill = toolset.get_skill("arxiv-search") +print(f"Skill: {skill.name}") +print(f"Description: {skill.metadata.description}") +print(f"Scripts: {[s.name for s in skill.scripts]}") + +# Refresh skills (rescans directories) +toolset.refresh() +``` + +### Custom Instructions Template + +```python +# Customize how skills appear in system prompt +template = """ +## Available Research Tools + +The following specialized tools are available: +{skills_list} + +To use a tool, first load its instructions with load_skill(name). +""" + +toolset = SkillsToolset( + directories=["./skills"], + instruction_template=template +) +``` + +## Error Handling + +The toolset raises specific exceptions for different error conditions: + +```python +from pydantic_ai.toolsets.skills import ( + SkillNotFoundError, + SkillValidationError, + SkillResourceLoadError, + SkillScriptExecutionError, +) + +try: + toolset = SkillsToolset(directories=["./skills"]) + skill = toolset.get_skill("non-existent") +except SkillNotFoundError as e: + print(f"Skill not found: {e}") +except SkillValidationError as e: + print(f"Invalid skill structure: {e}") +``` + +## Best Practices + +### Organization + +- **Organize by domain**: Group related skills in subdirectories +- **Use descriptive directories**: `./skills/research/`, `./skills/data-analysis/` + +### Testing + +- **Test skills independently**: Run scripts directly before adding to skills +- **Validate structure**: Use `validate=True` during development +- **Use programmatic discovery**: Test skill loading in your test suite + +### Security + +We strongly recommend using Skills only from trusted sources: those you created yourself or obtained from trusted sources. Skills provide AI Agents with new capabilities through instructions and code, and while this makes them powerful, it also means a malicious Skill can direct agents to invoke tools or execute code in ways that don't match the Skill's stated purpose. + +!!! warning + If you must use a Skill from an untrusted or unknown source, exercise extreme caution and thoroughly audit it before use. Depending on the access agents have when executing the Skill, malicious Skills could lead to data exfiltration, unauthorized system access, or other security risks. diff --git a/mkdocs.yml b/mkdocs.yml index 6c98ce6a4d..90f788fcf9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,7 +41,6 @@ nav: - tools.md - tools-advanced.md - toolsets.md - - skills.md - deferred-tools.md - builtin-tools.md - common-tools.md @@ -56,6 +55,10 @@ nav: - mcp/fastmcp-client.md - mcp/server.md - Multi-Agent Patterns: multi-agent-applications.md + - Skills: + - Overview: skills/overview.md + - Creating Skills: skills/creating-skills.md + - Using Skills: skills/using-skills.md - Testing: testing.md - Web Chat UI: web.md @@ -135,6 +138,7 @@ nav: - api/toolsets.md - api/builtin_tools.md - api/common_tools.md + - api/skills.md - api/durable_exec.md - api/output.md - api/result.md