diff --git a/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md b/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md new file mode 100644 index 0000000000..624e580ad1 --- /dev/null +++ b/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md @@ -0,0 +1,445 @@ +# dspy.GEPA - Advanced Features + +## Custom Instruction Proposers + +### What is instruction_proposer? + +The `instruction_proposer` is the component responsible for invoking the `reflection_lm` and proposing new prompts during GEPA optimization. When GEPA identifies underperforming components in your DSPy program, the instruction proposer analyzes execution traces, feedback, and failures to generate improved instructions tailored to the observed issues. + +### Default Implementation + +By default, GEPA uses the built-in instruction proposer from the [GEPA library](https://github.com/gepa-ai/gepa), which implements the [`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). The [default proposer](https://github.com/gepa-ai/gepa/blob/main/src/gepa/proposer/reflective_mutation/reflective_mutation.py#L53-L75) uses this prompt template: + +```` +I provided an assistant with the following instructions to perform a task for me: +``` + +``` + +The following are examples of different task inputs provided to the assistant along with the assistant's response for each of them, and some feedback on how the assistant's response could be better: +``` + +``` + +Your task is to write a new instruction for the assistant. + +Read the inputs carefully and identify the input format and infer detailed task description about the task I wish to solve with the assistant. + +Read all the assistant responses and the corresponding feedback. Identify all niche and domain specific factual information about the task and include it in the instruction, as a lot of it may not be available to the assistant in the future. The assistant may have utilized a generalizable strategy to solve the task, if so, include that in the instruction as well. + +Provide the new instructions within ``` blocks. +```` + +This template is automatically filled with: + +- ``: The current instruction being optimized +- ``: Structured markdown containing predictor inputs, generated outputs, and evaluation feedback + +Example of default behavior: + +```python +# Default instruction proposer is used automatically +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=api_key), + auto="medium" +) +optimized_program = gepa.compile(student, trainset=examples) +``` + +### When to Use Custom instruction_proposer + +**Note:** Custom instruction proposers are an advanced feature. Most users should start with the default proposer, which works well for most text-based optimization tasks. + +Consider implementing a custom instruction proposer when you need: + +- **Multi-modal handling**: Process images (dspy.Image) alongside textual information in your inputs +- **Nuanced control on limits and length constraints**: Have more fine-grained control over instruction length, format, and structural requirements +- **Domain-specific information**: Inject specialized knowledge, terminology, or context that the default proposer lacks and cannot be provided via feedback_func. This is an advanced feature, and most users should not need to use this. +- **Provider-specific prompting guides**: Optimize instructions for specific LLM providers (OpenAI, Anthropic, etc.) with their unique formatting preferences +- **Coupled component updates**: Handle situations where 2 or more components need to be updated together in a coordinated manner, rather than optimizing each component independently (refer to component_selector parameter, in [Custom Component Selection](#custom-component-selection) section, for related functionality) +- **External knowledge integration**: Connect to databases, APIs, or knowledge bases during instruction generation + +### Available Options + +**Built-in Options:** + +- **Default Proposer**: The standard GEPA instruction proposer (used when `instruction_proposer=None`). The default instruction proposer IS an instruction proposer as well! It is the most general one, that was used for the diverse experiments reported in the GEPA paper and tutorials. +- **MultiModalInstructionProposer**: Handles `dspy.Image` inputs and structured multimodal content. + +```python +from dspy.teleprompt.gepa.instruction_proposal import MultiModalInstructionProposer + +# For tasks involving images or multimodal inputs +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=api_key), + instruction_proposer=MultiModalInstructionProposer(), + auto="medium" +) +``` + +We invite community contributions of new instruction proposers for specialized domains as the [GEPA library](https://github.com/gepa-ai/gepa) continues to grow. + +### How to Implement Custom Instruction Proposers + +Custom instruction proposers must implement the `ProposalFn` protocol by defining a callable class or function. GEPA will call your proposer during optimization: + +```python +from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample + +class CustomInstructionProposer: + def __call__( + self, + candidate: dict[str, str], # Candidate component name -> instruction mapping to be updated in this round + reflective_dataset: dict[str, list[ReflectiveExample]], # Component -> examples with structure: {"Inputs": ..., "Generated Outputs": ..., "Feedback": ...} + components_to_update: list[str] # Which components to improve + ) -> dict[str, str]: # Return new instruction mapping only for components being updated + # Your custom instruction generation logic here + return updated_instructions + +# Or as a function: +def custom_instruction_proposer(candidate, reflective_dataset, components_to_update): + # Your custom instruction generation logic here + return updated_instructions +``` + +**Reflective Dataset Structure:** + +- `dict[str, list[ReflectiveExample]]` - Maps component names to lists of examples +- `ReflectiveExample` TypedDict contains: + - `Inputs: dict[str, Any]` - Predictor inputs (may include dspy.Image objects) + - `Generated_Outputs: dict[str, Any] | str` - Success: output fields dict, Failure: error message + - `Feedback: str` - Always a string from metric function or auto-generated by GEPA + +#### Basic Example: Word Limit Proposer + +```python +import dspy +from gepa.core.adapter import ProposalFn +from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample + +class GenerateWordLimitedInstruction(dspy.Signature): + """Given a current instruction and feedback examples, generate an improved instruction with word limit constraints.""" + + current_instruction = dspy.InputField(desc="The current instruction that needs improvement") + feedback_summary = dspy.InputField(desc="Feedback from examples that might include both positive and negative cases") + max_words = dspy.InputField(desc="Maximum number of words allowed in the new instruction") + + improved_instruction = dspy.OutputField(desc="A new instruction that fixes the issues while staying under the max_words limit") + +class WordLimitProposer(ProposalFn): + def __init__(self, max_words: int = 1000): + self.max_words = max_words + self.instruction_improver = dspy.ChainOfThought(GenerateWordLimitedInstruction) + + def __call__(self, candidate: dict[str, str], reflective_dataset: dict[str, list[ReflectiveExample]], components_to_update: list[str]) -> dict[str, str]: + updated_components = {} + + for component_name in components_to_update: + if component_name not in candidate or component_name not in reflective_dataset: + continue + + current_instruction = candidate[component_name] + component_examples = reflective_dataset[component_name] + + # Create feedback summary + feedback_text = "\n".join([ + f"Example {i+1}: {ex.get('Feedback', 'No feedback')}" + for i, ex in enumerate(component_examples) # Limit examples to prevent context overflow + ]) + + # Use the module to improve the instruction + result = self.instruction_improver( + current_instruction=current_instruction, + feedback_summary=feedback_text, + max_words=self.max_words + ) + + updated_components[component_name] = result.improved_instruction + + return updated_components + +# Usage +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=api_key), + instruction_proposer=WordLimitProposer(max_words=700), + auto="medium" +) +``` + +#### Advanced Example: RAG-Enhanced Instruction Proposer + +```python +import dspy +from gepa.core.adapter import ProposalFn +from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample + +class GenerateDocumentationQuery(dspy.Signature): + """Analyze examples with feedback to identify common issue patterns and generate targeted database queries for retrieving relevant documentation. + + Your goal is to search a document database for guidelines that address the problematic patterns found in the examples. Look for recurring issues, error types, or failure modes in the feedback, then craft specific search queries that will find documentation to help resolve these patterns.""" + + current_instruction = dspy.InputField(desc="The current instruction that needs improvement") + examples_with_feedback = dspy.InputField(desc="Examples with their feedback showing what issues occurred and any recurring patterns") + + failure_patterns: str = dspy.OutputField(desc="Summarize the common failure patterns identified in the examples") + + retrieval_queries: list[str] = dspy.OutputField(desc="Specific search queries to find relevant documentation in the database that addresses the common issue patterns identified in the problematic examples") + +class GenerateRAGEnhancedInstruction(dspy.Signature): + """Generate improved instructions using retrieved documentation and examples analysis.""" + + current_instruction = dspy.InputField(desc="The current instruction that needs improvement") + relevant_documentation = dspy.InputField(desc="Retrieved guidelines and best practices from specialized documentation") + examples_with_feedback = dspy.InputField(desc="Examples showing what issues occurred with the current instruction") + + improved_instruction: str = dspy.OutputField(desc="Enhanced instruction that incorporates retrieved guidelines and addresses the issues shown in the examples") + +class RAGInstructionImprover(dspy.Module): + """Module that uses RAG to improve instructions with specialized documentation.""" + + def __init__(self, retrieval_model): + super().__init__() + self.retrieve = retrieval_model # Could be dspy.Retrieve or custom retriever + self.query_generator = dspy.ChainOfThought(GenerateDocumentationQuery) + self.generate_answer = dspy.ChainOfThought(GenerateRAGEnhancedInstruction) + + def forward(self, current_instruction: str, component_examples: list): + """Improve instruction using retrieved documentation.""" + + # Let LM analyze examples and generate targeted retrieval queries + query_result = self.query_generator( + current_instruction=current_instruction, + examples_with_feedback=component_examples + ) + + results = self.retrieve.query( + query_texts=query_result.retrieval_queries, + n_results=3 + ) + + relevant_docs_parts = [] + for i, (query, query_docs) in enumerate(zip(query_result.retrieval_queries, results['documents'])): + if query_docs: + docs_formatted = "\n".join([f" - {doc}" for doc in query_docs]) + relevant_docs_parts.append( + f"**Search Query #{i+1}**: {query}\n" + f"**Retrieved Guidelines**:\n{docs_formatted}" + ) + + relevant_docs = "\n\n" + "="*60 + "\n\n".join(relevant_docs_parts) + "\n" + "="*60 + + # Generate improved instruction with retrieved context + result = self.generate_answer( + current_instruction=current_instruction, + relevant_documentation=relevant_docs, + examples_with_feedback=component_examples + ) + + return result + +class DocumentationEnhancedProposer(ProposalFn): + """Instruction proposer that accesses specialized documentation via RAG.""" + + def __init__(self, documentation_retriever): + """ + Args: + documentation_retriever: A retrieval model that can search your specialized docs + Could be dspy.Retrieve, ChromadbRM, or custom retriever + """ + self.instruction_improver = RAGInstructionImprover(documentation_retriever) + + def __call__(self, candidate: dict[str, str], reflective_dataset: dict[str, list[ReflectiveExample]], components_to_update: list[str]) -> dict[str, str]: + updated_components = {} + + for component_name in components_to_update: + if component_name not in candidate or component_name not in reflective_dataset: + continue + + current_instruction = candidate[component_name] + component_examples = reflective_dataset[component_name] + + result = self.instruction_improver( + current_instruction=current_instruction, + component_examples=component_examples + ) + + updated_components[component_name] = result.improved_instruction + + return updated_components + +import chromadb + +client = chromadb.Client() +collection = client.get_collection("instruction_guidelines") + +gepa = dspy.GEPA( + metric=task_specific_metric, + reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=api_key), + instruction_proposer=DocumentationEnhancedProposer(collection), + auto="medium" +) +``` + +#### Integration Patterns + +**Using Custom Proposer with External LM:** + +```python +class ExternalLMProposer(ProposalFn): + def __init__(self): + # Manage your own LM instance + self.external_lm = dspy.LM('gemini/gemini-2.5-pro') + + def __call__(self, candidate, reflective_dataset, components_to_update): + updated_components = {} + + with dspy.context(lm=self.external_lm): + # Your custom logic here using self.external_lm + for component_name in components_to_update: + # ... implementation + pass + + return updated_components + +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=None, # Optional when using custom proposer + instruction_proposer=ExternalLMProposer(), + auto="medium" +) +``` + +**Best Practices:** + +- **Use the full power of DSPy**: Leverage DSPy components like `dspy.Module`, `dspy.Signature`, and `dspy.Predict` to create your instruction proposer rather than direct LM calls. Consider `dspy.Refine` for constraint satisfaction, `dspy.ChainOfThought` for complex reasoning tasks, and compose multiple modules for sophisticated instruction improvement workflows +- **Enable holistic feedback analysis**: While dspy.GEPA's `GEPAFeedbackMetric` processes one (gold, prediction) pair at a time, instruction proposers receive all examples for a component in batch, enabling cross-example pattern detection and systematic issue identification. +- **Mind data serialization**: Serializing everything to strings might not be ideal - handle complex input types (like `dspy.Image`) by maintaining their structure for better LM processing +- **Test thoroughly**: Test your custom proposer with representative failure cases + +## Custom Component Selection + +### What is component_selector? + +The `component_selector` parameter controls which components (predictors) in your DSPy program are selected for optimization at each GEPA iteration. Instead of the default round-robin approach that updates one component at a time, you can implement custom selection strategies that choose single or multiple components based on optimization state, performance trajectories, and other contextual information. + +### Default Behavior + +By default, GEPA uses a **round-robin strategy** (`RoundRobinReflectionComponentSelector`) that cycles through components sequentially, optimizing one component per iteration: + +```python +# Default round-robin component selection +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=dspy.LM(model="gpt-5", temperature=1.0, max_tokens=32000, api_key=api_key), + # component_selector="round_robin" # This is the default + auto="medium" +) +``` + +### Built-in Selection Strategies + +**String-based selectors:** + +- `"round_robin"` (default): Cycles through components one at a time +- `"all"`: Selects all components for simultaneous optimization + +```python +# Optimize all components simultaneously +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=reflection_lm, + component_selector="all", # Update all components together + auto="medium" +) + +# Explicit round-robin selection +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=reflection_lm, + component_selector="round_robin", # One component per iteration + auto="medium" +) +``` + +### When to Use Custom Component Selection + +Consider implementing custom component selection when you need: + +- **Dependency-aware optimization**: Update related components together (e.g., a classifier and its input formatter) +- **LLM-driven selection**: Let an LLM analyze trajectories and decide which components need attention +- **Resource-conscious optimization**: Balance optimization thoroughness with computational budget + +### Custom Component Selector Protocol + +Custom component selectors must implement the [`ReflectionComponentSelector`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/proposer/reflective_mutation/base.py) protocol by defining a callable class or function. GEPA will call your selector during optimization: + +```python +from dspy.teleprompt.gepa.gepa_utils import GEPAState, Trajectory + +class CustomComponentSelector: + def __call__( + self, + state: GEPAState, # Complete optimization state with history + trajectories: list[Trajectory], # Execution traces from the current minibatch + subsample_scores: list[float], # Scores for each example in the current minibatch + candidate_idx: int, # Index of the current program candidate being optimized + candidate: dict[str, str], # Component name -> instruction mapping + ) -> list[str]: # Return list of component names to optimize + # Your custom component selection logic here + return selected_components + +# Or as a function: +def custom_component_selector(state, trajectories, subsample_scores, candidate_idx, candidate): + # Your custom component selection logic here + return selected_components +``` + +### Custom Implementation Example + +Here's a simple function that alternates between optimizing different halves of your components: + +```python +def alternating_half_selector(state, trajectories, subsample_scores, candidate_idx, candidate): + """Optimize half the components on even iterations, half on odd iterations.""" + components = list(candidate.keys()) + + # If there's only one component, always optimize it + if len(components) <= 1: + return components + + mid_point = len(components) // 2 + + # Use state.i (iteration counter) to alternate between halves + if state.i % 2 == 0: + # Even iteration: optimize first half + return components[:mid_point] + else: + # Odd iteration: optimize second half + return components[mid_point:] + +# Usage +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=reflection_lm, + component_selector=alternating_half_selector, + auto="medium" +) +``` + +### Integration with Custom Instruction Proposers + +Component selectors work seamlessly with custom instruction proposers. The selector determines which components to update, then the instruction proposer generates new instructions for those components: + +```python +# Combined custom selector + custom proposer +gepa = dspy.GEPA( + metric=my_metric, + reflection_lm=reflection_lm, + component_selector=alternating_half_selector, + instruction_proposer=WordLimitProposer(max_words=500), + auto="medium" +) +``` diff --git a/docs/docs/api/optimizers/GEPA.md b/docs/docs/api/optimizers/GEPA/overview.md similarity index 95% rename from docs/docs/api/optimizers/GEPA.md rename to docs/docs/api/optimizers/GEPA/overview.md index 9b7484fc5d..0125702bea 100644 --- a/docs/docs/api/optimizers/GEPA.md +++ b/docs/docs/api/optimizers/GEPA/overview.md @@ -57,7 +57,7 @@ When `track_stats=True`, GEPA returns detailed results about all of the proposed ## Usage Examples -See GEPA usage tutorials in [GEPA Tutorials](../../tutorials/gepa_ai_program/index.md). +See GEPA usage tutorials in [GEPA Tutorials](../../../tutorials/gepa_ai_program/index.md). ### Inference-Time Search @@ -117,8 +117,12 @@ Practical Recipe for GEPA-Friendly Feedback: - **Multi-Objective Tasks** (e.g., PUPA): Decompose aggregate scores to reveal contributions from each objective, highlighting tradeoffs (e.g., quality vs. privacy). - **Stacked Pipelines** (e.g., code generation: parse → compile → run → profile → evaluate): Expose stage-specific failures; natural-language traces often suffice for LLM self-correction. +## Custom Instruction Proposal + +For advanced customization of GEPA's instruction proposal mechanism, including custom instruction proposers and component selectors, see [Advanced Features](GEPA_Advanced.md). + ## Further Reading - [GEPA Paper: arxiv:2507.19457](https://arxiv.org/abs/2507.19457) - [GEPA Github](https://github.com/gepa-ai/gepa) - This repository provides the core GEPA evolution pipeline used by `dspy.GEPA` optimizer. -- [DSPy Tutorials](../../tutorials/gepa_ai_program/index.md) +- [DSPy Tutorials](../../../tutorials/gepa_ai_program/index.md) diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md index 195f9fd42e..a684f6f5b4 100644 --- a/docs/docs/learn/optimization/optimizers.md +++ b/docs/docs/learn/optimization/optimizers.md @@ -60,7 +60,7 @@ These optimizers produce optimal instructions for the prompt and, in the case of 7. [**`SIMBA`**](../../api/optimizers/SIMBA.md) -8. [**`GEPA`**](../../api/optimizers/GEPA.md): Uses LM's to reflect on the DSPy program's trajectory, to identify what worked, what didn't and propose prompts addressing the gaps. Additionally, GEPA can leverage domain-specific textual feedback to rapidly improve the DSPy program. Detailed tutorials on using GEPA are available at [dspy.GEPA Tutorials](../../tutorials/gepa_ai_program/index.md). +8. [**`GEPA`**](../../api/optimizers/GEPA/overview.md): Uses LM's to reflect on the DSPy program's trajectory, to identify what worked, what didn't and propose prompts addressing the gaps. Additionally, GEPA can leverage domain-specific textual feedback to rapidly improve the DSPy program. Detailed tutorials on using GEPA are available at [dspy.GEPA Tutorials](../../tutorials/gepa_ai_program/index.md). ### Automatic Finetuning diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index b39b1e0c7b..d383d7573c 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -123,7 +123,9 @@ nav: - BootstrapRS: api/optimizers/BootstrapRS.md - COPRO: api/optimizers/COPRO.md - Ensemble: api/optimizers/Ensemble.md - - GEPA: api/optimizers/GEPA.md + - GEPA: + - Overview: api/optimizers/GEPA/overview.md + - Advanced Features: api/optimizers/GEPA/GEPA_Advanced.md - InferRules: api/optimizers/InferRules.md - KNN: api/optimizers/KNN.md - KNNFewShot: api/optimizers/KNNFewShot.md @@ -243,6 +245,7 @@ plugins: "deep-dive/optimizers/Ensemble.md": "api/optimizers/Ensemble.md" "deep-dive/optimizers/LabeledFewShot.md": "api/optimizers/LabeledFewShot.md" "deep-dive/optimizers/miprov2.md": "api/optimizers/MIPROv2.md" + "api/optimizers/GEPA/index.md": "api/optimizers/GEPA/overview.md" "docs/quick-start/getting-started-01.md": "tutorials/rag/index.ipynb" "docs/quick-start/getting-started-02.md": "tutorials/rag/index.ipynb" diff --git a/dspy/teleprompt/gepa/gepa.py b/dspy/teleprompt/gepa/gepa.py index c2ab1b68f7..8da91fc692 100644 --- a/dspy/teleprompt/gepa/gepa.py +++ b/dspy/teleprompt/gepa/gepa.py @@ -214,14 +214,31 @@ def metric( a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)` for optimal performance. skip_perfect_score: Whether to skip examples with perfect scores during reflection. Default is True. - instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol. - If provided, GEPA will use this custom proposer instead of its default instruction proposal - mechanism to generate improved instructions based on feedback from failed examples. This is - particularly useful when you need specialized instruction generation for multimodal inputs - (like dspy.Image) or custom types. Use `MultiModalInstructionProposer()` from - `dspy.teleprompt.gepa.instruction_proposal` for handling visual content. If None (default), - GEPA uses its built-in text-optimized proposer (see `gepa.strategies.instruction_proposal.InstructionProposalSignature` - for reference implementation). + instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol. + **Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from + the [GEPA library](https://github.com/gepa-ai/gepa), which implements the + [`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default + proposer is highly capable and was validated across diverse experiments reported in the GEPA + paper and tutorials. + + See documentation on custom instruction proposers + [here](https://dspy.ai/api/optimizers/GEPA/GEPA_Advanced/#custom-instruction-proposers). + + **Advanced Feature**: Only needed for specialized scenarios: + - **Multi-modal handling**: Processing dspy.Image inputs alongside textual information + - **Nuanced control over constraints**: Fine-grained control over instruction length, format, + and structural requirements beyond standard feedback mechanisms + - **Domain-specific knowledge injection**: Specialized terminology or context that cannot be + provided through feedback_func alone + - **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic) + with unique formatting preferences + - **Coupled component updates**: Coordinated updates of multiple components together rather + than independent optimization + - **External knowledge integration**: Runtime access to databases, APIs, or knowledge bases + + The default proposer handles the vast majority of use cases effectively. Use + MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual + content or implement custom ProposalFn for highly specialized requirements. Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer. diff --git a/dspy/teleprompt/gepa/gepa_utils.py b/dspy/teleprompt/gepa/gepa_utils.py index 50f8b6ea7a..ddca861656 100644 --- a/dspy/teleprompt/gepa/gepa_utils.py +++ b/dspy/teleprompt/gepa/gepa_utils.py @@ -1,6 +1,6 @@ import logging import random -from typing import Any, Callable, Protocol +from typing import Any, Callable, Protocol, TypedDict from gepa import EvaluationBatch, GEPAAdapter from gepa.core.adapter import ProposalFn @@ -24,6 +24,18 @@ def log(self, x: str): DSPyTrace = list[tuple[Any, dict[str, Any], Prediction]] + +class ReflectiveExample(TypedDict): + """ + Structure of individual examples in the reflective dataset. + + Each example contains the predictor inputs, generated outputs, and feedback from evaluation. + """ + Inputs: dict[str, Any] # Predictor inputs (may include str, dspy.Image, etc.) + Generated_Outputs: dict[str, Any] | str # Success: dict with output fields, Failure: error message string + Feedback: str # Always a string - from metric function or parsing error message + + class ScoreWithFeedback(Prediction): score: float feedback: str @@ -158,11 +170,11 @@ def evaluate(self, batch, candidate, capture_traces=False): scores = [s["score"] if hasattr(s, "score") else s for s in scores] return EvaluationBatch(outputs=outputs, scores=scores, trajectories=None) - def make_reflective_dataset(self, candidate, eval_batch, components_to_update): + def make_reflective_dataset(self, candidate, eval_batch, components_to_update) -> dict[str, list[ReflectiveExample]]: from dspy.teleprompt.bootstrap_trace import FailedPrediction program = self.build_program(candidate) - ret_d: dict[str, list[dict[str, Any]]] = {} + ret_d: dict[str, list[ReflectiveExample]] = {} for pred_name in components_to_update: module = None for name, m in program.named_predictors(): @@ -171,7 +183,7 @@ def make_reflective_dataset(self, candidate, eval_batch, components_to_update): break assert module is not None - items: list[dict[str, Any]] = [] + items: list[ReflectiveExample] = [] for data in eval_batch.trajectories or []: trace = data["trace"] example = data["example"] diff --git a/dspy/teleprompt/gepa/instruction_proposal.py b/dspy/teleprompt/gepa/instruction_proposal.py index 23b7ab1802..23810b9a02 100644 --- a/dspy/teleprompt/gepa/instruction_proposal.py +++ b/dspy/teleprompt/gepa/instruction_proposal.py @@ -4,6 +4,7 @@ import dspy from dspy.adapters.types.base_type import Type +from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample class GenerateEnhancedMultimodalInstructionFromFeedback(dspy.Signature): @@ -54,7 +55,7 @@ def __init__(self): super().__init__() self.propose_instruction = dspy.Predict(GenerateEnhancedMultimodalInstructionFromFeedback) - def forward(self, current_instruction: str, reflective_dataset: list[dict[str, Any]]) -> str: + def forward(self, current_instruction: str, reflective_dataset: list[ReflectiveExample]) -> str: """ Generate an improved instruction based on current instruction and feedback examples. @@ -84,7 +85,7 @@ def forward(self, current_instruction: str, reflective_dataset: list[dict[str, A return result.improved_instruction def _format_examples_with_pattern_analysis( - self, reflective_dataset: list[dict[str, Any]] + self, reflective_dataset: list[ReflectiveExample] ) -> tuple[str, dict[int, list[Type]]]: """ Format examples with pattern analysis and feedback categorization. @@ -106,7 +107,7 @@ def _format_examples_with_pattern_analysis( return formatted_examples, image_map - def _analyze_feedback_patterns(self, reflective_dataset: list[dict[str, Any]]) -> dict[str, Any]: + def _analyze_feedback_patterns(self, reflective_dataset: list[ReflectiveExample]) -> dict[str, Any]: """ Analyze feedback patterns to provide better context for instruction generation. @@ -177,7 +178,7 @@ def _create_pattern_summary(self, feedback_analysis: dict[str, Any]) -> str: return "\n".join(summary_parts) def _format_examples_for_instruction_generation( - self, reflective_dataset: list[dict[str, Any]] + self, reflective_dataset: list[ReflectiveExample] ) -> tuple[str, dict[int, list[Type]]]: """ Format examples using GEPA's markdown structure while preserving image objects. @@ -278,7 +279,7 @@ def __init__(self): def __call__( self, candidate: dict[str, str], - reflective_dataset: dict[str, list[dict[str, Any]]], + reflective_dataset: dict[str, list[ReflectiveExample]], components_to_update: list[str], ) -> dict[str, str]: """GEPA-compatible proposal function. diff --git a/tests/teleprompt/test_gepa.py b/tests/teleprompt/test_gepa.py index b78e808947..9580120a80 100644 --- a/tests/teleprompt/test_gepa.py +++ b/tests/teleprompt/test_gepa.py @@ -414,3 +414,65 @@ def random_component_selector(state, trajectories, subsample_scores, candidate_i result = optimizer.compile(student, trainset=trainset, valset=trainset) assert result is not None, "Should work with custom random function selector" + + +def test_alternating_half_component_selector(): + """Test alternating half selector that optimizes different halves on even/odd iterations.""" + + selection_history = [] + + def alternating_half_selector(state, trajectories, subsample_scores, candidate_idx, candidate): + """Optimize half the components on even iterations, half on odd iterations.""" + components = list(candidate.keys()) + + # If there's only one component, always optimize it + if len(components) <= 1: + selected = components + else: + mid_point = len(components) // 2 + + # Use state.i (iteration counter) to alternate between halves + if state.i % 2 == 0: + # Even iteration: optimize first half + selected = components[:mid_point] + else: + # Odd iteration: optimize second half + selected = components[mid_point:] + + # Track selections for verification + selection_history.append({ + "iteration": state.i, + "selected": selected.copy(), + "all_components": components.copy() + }) + + return selected + + student = MultiComponentModule() # Has "classifier" and "generator" components + + # Provide enough responses for multiple iterations + task_lm = DummyLM([{"category": "test_category", "output": "test_output"}] * 20) + reflection_lm = DummyLM([{"improved_instruction": "Better instruction"}] * 10) + trainset = [dspy.Example(input="test", output="expected").with_inputs("input")] + + with dspy.context(lm=task_lm): + optimizer = dspy.GEPA( + metric=component_selection_metric, + reflection_lm=reflection_lm, + max_metric_calls=8, # Allow multiple iterations + component_selector=alternating_half_selector, + ) + result = optimizer.compile(student, trainset=trainset, valset=trainset) + + assert result is not None, "Should work with alternating half selector" + assert len(selection_history) >= 2, "Should have made multiple selections" + + for i, selection in enumerate(selection_history): + if selection["iteration"] % 2 == 0: + # Even iteration should select first half: ["classifier"] + assert "classifier" in selection["selected"], f"Even iteration {selection['iteration']} should include classifier" + assert "generator" not in selection["selected"], f"Even iteration {selection['iteration']} should not include generator" + else: + # Odd iteration should select second half: ["generator"] + assert "generator" in selection["selected"], f"Odd iteration {selection['iteration']} should include generator" + assert "classifier" not in selection["selected"], f"Odd iteration {selection['iteration']} should not include classifier"