stanfordnlp · andressrg · Sep 6, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 11, 2025
diff --git a/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md b/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md
diff --git a/docs/docs/api/optimizers/GEPA.md → docs/docs/api/optimizers/GEPA/overview.md b/docs/docs/api/optimizers/GEPA.md → docs/docs/api/optimizers/GEPA/overview.md
@@ -57,7 +57,7 @@ When `track_stats=True`, GEPA returns detailed results about all of the proposed
 
 ## Usage Examples
 
-See GEPA usage tutorials in [GEPA Tutorials](../../tutorials/gepa_ai_program/index.md).
+See GEPA usage tutorials in [GEPA Tutorials](../../../tutorials/gepa_ai_program/index.md).
 
 ### Inference-Time Search
 
@@ -121,4 +121,4 @@ Practical Recipe for GEPA-Friendly Feedback:
 
 - [GEPA Paper: arxiv:2507.19457](https://arxiv.org/abs/2507.19457)
 - [GEPA Github](https://github.com/gepa-ai/gepa) - This repository provides the core GEPA evolution pipeline used by `dspy.GEPA` optimizer.
-- [DSPy Tutorials](../../tutorials/gepa_ai_program/index.md)
+- [DSPy Tutorials](../../../tutorials/gepa_ai_program/index.md)
diff --git a/docs/docs/learn/optimization/optimizers.md b/docs/docs/learn/optimization/optimizers.md
@@ -60,7 +60,7 @@ These optimizers produce optimal instructions for the prompt and, in the case of
 
 7. [**`SIMBA`**](../../api/optimizers/SIMBA.md)
 
-8. [**`GEPA`**](../../api/optimizers/GEPA.md): Uses LM's to reflect on the DSPy program's trajectory, to identify what worked, what didn't and propose prompts addressing the gaps. Additionally, GEPA can leverage domain-specific textual feedback to rapidly improve the DSPy program. Detailed tutorials on using GEPA are available at [dspy.GEPA Tutorials](../../tutorials/gepa_ai_program/index.md).
+8. [**`GEPA`**](../../api/optimizers/GEPA/overview.md): Uses LM's to reflect on the DSPy program's trajectory, to identify what worked, what didn't and propose prompts addressing the gaps. Additionally, GEPA can leverage domain-specific textual feedback to rapidly improve the DSPy program. Detailed tutorials on using GEPA are available at [dspy.GEPA Tutorials](../../tutorials/gepa_ai_program/index.md).
 
 ### Automatic Finetuning
 

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -123,7 +123,9 @@ nav:
             - BootstrapRS: api/optimizers/BootstrapRS.md
             - COPRO: api/optimizers/COPRO.md
             - Ensemble: api/optimizers/Ensemble.md
-            - GEPA: api/optimizers/GEPA.md
+            - GEPA:
+                - Overview: api/optimizers/GEPA/overview.md
+                - Advanced Features: api/optimizers/GEPA/GEPA_Advanced.md
             - InferRules: api/optimizers/InferRules.md
             - KNN: api/optimizers/KNN.md
             - KNNFewShot: api/optimizers/KNNFewShot.md
@@ -243,6 +245,7 @@ plugins:
             "deep-dive/optimizers/Ensemble.md": "api/optimizers/Ensemble.md"
             "deep-dive/optimizers/LabeledFewShot.md": "api/optimizers/LabeledFewShot.md"
             "deep-dive/optimizers/miprov2.md": "api/optimizers/MIPROv2.md"
+            "api/optimizers/GEPA/index.md": "api/optimizers/GEPA/overview.md"
 
             "docs/quick-start/getting-started-01.md": "tutorials/rag/index.ipynb"
             "docs/quick-start/getting-started-02.md": "tutorials/rag/index.ipynb"

diff --git a/dspy/teleprompt/gepa/gepa.py b/dspy/teleprompt/gepa/gepa.py
@@ -214,14 +214,28 @@ def metric(
             a strong reflection model. Consider using `dspy.LM(model='gpt-5', temperature=1.0, max_tokens=32000)` 
             for optimal performance.
         skip_perfect_score: Whether to skip examples with perfect scores during reflection. Default is True.
-        instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol. 
-            If provided, GEPA will use this custom proposer instead of its default instruction proposal 
-            mechanism to generate improved instructions based on feedback from failed examples. This is 
-            particularly useful when you need specialized instruction generation for multimodal inputs 
-            (like dspy.Image) or custom types. Use `MultiModalInstructionProposer()` from 
-            `dspy.teleprompt.gepa.instruction_proposal` for handling visual content. If None (default), 
-            GEPA uses its built-in text-optimized proposer (see `gepa.strategies.instruction_proposal.InstructionProposalSignature` 
-            for reference implementation).
+        instruction_proposer: Optional custom instruction proposer implementing GEPA's ProposalFn protocol.
+            **Default: None (recommended for most users)** - Uses GEPA's proven instruction proposer from 
+            the [GEPA library](https://github.com/gepa-ai/gepa), which implements the 
+            [`ProposalFn`](https://github.com/gepa-ai/gepa/blob/main/src/gepa/core/adapter.py). This default 
+            proposer is highly capable and was validated across diverse experiments reported in the GEPA 
+            paper and tutorials.
+
+            **Advanced Feature**: Only needed for specialized scenarios:
+            - **Multi-modal handling**: Processing dspy.Image inputs alongside textual information
+            - **Nuanced control over constraints**: Fine-grained control over instruction length, format, 
+              and structural requirements beyond standard feedback mechanisms
+            - **Domain-specific knowledge injection**: Specialized terminology or context that cannot be 
+              provided through feedback_func alone
+            - **Provider-specific prompting**: Optimizations for specific LLM providers (OpenAI, Anthropic) 
+              with unique formatting preferences
+            - **Coupled component updates**: Coordinated updates of multiple components together rather 
+              than independent optimization
+            - **External knowledge integration**: Runtime access to databases, APIs, or knowledge bases
+
+            The default proposer handles the vast majority of use cases effectively. Use 
+            MultiModalInstructionProposer() from dspy.teleprompt.gepa.instruction_proposal for visual 
+            content or implement custom ProposalFn for highly specialized requirements.
 
             Note: When both instruction_proposer and reflection_lm are set, the instruction_proposer is called 
             in the reflection_lm context. However, reflection_lm is optional when using a custom instruction_proposer. 

diff --git a/dspy/teleprompt/gepa/gepa_utils.py b/dspy/teleprompt/gepa/gepa_utils.py
@@ -1,6 +1,6 @@
 import logging
 import random
-from typing import Any, Callable, Protocol
+from typing import Any, Callable, Protocol, TypedDict
 
 from gepa import EvaluationBatch, GEPAAdapter
 from gepa.core.adapter import ProposalFn
@@ -24,6 +24,18 @@ def log(self, x: str):
 
 DSPyTrace = list[tuple[Any, dict[str, Any], Prediction]]
 
+
+class ReflectiveExample(TypedDict):
+    """
+    Structure of individual examples in the reflective dataset.
+
+    Each example contains the predictor inputs, generated outputs, and feedback from evaluation.
+    """
+    Inputs: dict[str, Any]                              # Predictor inputs (may include str, dspy.Image, etc.)
+    Generated_Outputs: dict[str, Any] | str             # Success: dict with output fields, Failure: error message string
+    Feedback: str                                       # Always a string - from metric function or parsing error message
+
+
 class ScoreWithFeedback(Prediction):
     score: float
     feedback: str
@@ -158,11 +170,11 @@ def evaluate(self, batch, candidate, capture_traces=False):
             scores = [s["score"] if hasattr(s, "score") else s for s in scores]
             return EvaluationBatch(outputs=outputs, scores=scores, trajectories=None)
 
-    def make_reflective_dataset(self, candidate, eval_batch, components_to_update):
+    def make_reflective_dataset(self, candidate, eval_batch, components_to_update) -> dict[str, list[ReflectiveExample]]:
         from dspy.teleprompt.bootstrap_trace import FailedPrediction
         program = self.build_program(candidate)
 
-        ret_d: dict[str, list[dict[str, Any]]] = {}
+        ret_d: dict[str, list[ReflectiveExample]] = {}
         for pred_name in components_to_update:
             module = None
             for name, m in program.named_predictors():
@@ -171,7 +183,7 @@ def make_reflective_dataset(self, candidate, eval_batch, components_to_update):
                     break
             assert module is not None
 
-            items: list[dict[str, Any]] = []
+            items: list[ReflectiveExample] = []
             for data in eval_batch.trajectories or []:
                 trace = data["trace"]
                 example = data["example"]

diff --git a/dspy/teleprompt/gepa/instruction_proposal.py b/dspy/teleprompt/gepa/instruction_proposal.py
@@ -4,6 +4,7 @@
 
 import dspy
 from dspy.adapters.types.base_type import Type
+from dspy.teleprompt.gepa.gepa_utils import ReflectiveExample
 
 
 class GenerateEnhancedMultimodalInstructionFromFeedback(dspy.Signature):
@@ -54,7 +55,7 @@ def __init__(self):
         super().__init__()
         self.propose_instruction = dspy.Predict(GenerateEnhancedMultimodalInstructionFromFeedback)
 
-    def forward(self, current_instruction: str, reflective_dataset: list[dict[str, Any]]) -> str:
+    def forward(self, current_instruction: str, reflective_dataset: list[ReflectiveExample]) -> str:
         """
         Generate an improved instruction based on current instruction and feedback examples.
 
@@ -84,7 +85,7 @@ def forward(self, current_instruction: str, reflective_dataset: list[dict[str, A
         return result.improved_instruction
 
     def _format_examples_with_pattern_analysis(
-        self, reflective_dataset: list[dict[str, Any]]
+        self, reflective_dataset: list[ReflectiveExample]
     ) -> tuple[str, dict[int, list[Type]]]:
         """
         Format examples with pattern analysis and feedback categorization.
@@ -106,7 +107,7 @@ def _format_examples_with_pattern_analysis(
 
         return formatted_examples, image_map
 
-    def _analyze_feedback_patterns(self, reflective_dataset: list[dict[str, Any]]) -> dict[str, Any]:
+    def _analyze_feedback_patterns(self, reflective_dataset: list[ReflectiveExample]) -> dict[str, Any]:
         """
         Analyze feedback patterns to provide better context for instruction generation.
 
@@ -177,7 +178,7 @@ def _create_pattern_summary(self, feedback_analysis: dict[str, Any]) -> str:
         return "\n".join(summary_parts)
 
     def _format_examples_for_instruction_generation(
-        self, reflective_dataset: list[dict[str, Any]]
+        self, reflective_dataset: list[ReflectiveExample]
     ) -> tuple[str, dict[int, list[Type]]]:
         """
         Format examples using GEPA's markdown structure while preserving image objects.
@@ -278,7 +279,7 @@ def __init__(self):
     def __call__(
         self,
         candidate: dict[str, str],
-        reflective_dataset: dict[str, list[dict[str, Any]]],
+        reflective_dataset: dict[str, list[ReflectiveExample]],
         components_to_update: list[str],
     ) -> dict[str, str]:
         """GEPA-compatible proposal function.

diff --git a/tests/teleprompt/test_gepa.py b/tests/teleprompt/test_gepa.py
@@ -414,3 +414,65 @@ def random_component_selector(state, trajectories, subsample_scores, candidate_i
         result = optimizer.compile(student, trainset=trainset, valset=trainset)
 
     assert result is not None, "Should work with custom random function selector"
+
+
+def test_alternating_half_component_selector():
+    """Test alternating half selector that optimizes different halves on even/odd iterations."""
+
+    selection_history = []
+
+    def alternating_half_selector(state, trajectories, subsample_scores, candidate_idx, candidate):
+        """Optimize half the components on even iterations, half on odd iterations."""
+        components = list(candidate.keys())
+
+        # If there's only one component, always optimize it
+        if len(components) <= 1:
+            selected = components
+        else:
+            mid_point = len(components) // 2
+
+            # Use state.i (iteration counter) to alternate between halves
+            if state.i % 2 == 0:
+                # Even iteration: optimize first half
+                selected = components[:mid_point]
+            else:
+                # Odd iteration: optimize second half
+                selected = components[mid_point:]
+
+        # Track selections for verification
+        selection_history.append({
+            "iteration": state.i,
+            "selected": selected.copy(),
+            "all_components": components.copy()
+        })
+
+        return selected
+
+    student = MultiComponentModule()  # Has "classifier" and "generator" components
+
+    # Provide enough responses for multiple iterations
+    task_lm = DummyLM([{"category": "test_category", "output": "test_output"}] * 20)
+    reflection_lm = DummyLM([{"improved_instruction": "Better instruction"}] * 10)
+    trainset = [dspy.Example(input="test", output="expected").with_inputs("input")]
+
+    with dspy.context(lm=task_lm):
+        optimizer = dspy.GEPA(
+            metric=component_selection_metric,
+            reflection_lm=reflection_lm,
+            max_metric_calls=8,  # Allow multiple iterations
+            component_selector=alternating_half_selector,
+        )
+        result = optimizer.compile(student, trainset=trainset, valset=trainset)
+
+    assert result is not None, "Should work with alternating half selector"
+    assert len(selection_history) >= 2, "Should have made multiple selections"
+
+    for i, selection in enumerate(selection_history):
+        if selection["iteration"] % 2 == 0:
+            # Even iteration should select first half: ["classifier"]
+            assert "classifier" in selection["selected"], f"Even iteration {selection['iteration']} should include classifier"
+            assert "generator" not in selection["selected"], f"Even iteration {selection['iteration']} should not include generator"
+        else:
+            # Odd iteration should select second half: ["generator"]
+            assert "generator" in selection["selected"], f"Odd iteration {selection['iteration']} should include generator"
+            assert "classifier" not in selection["selected"], f"Odd iteration {selection['iteration']} should not include classifier"