diff --git a/aider/args.py b/aider/args.py index 88fd6853647..9780949aac1 100644 --- a/aider/args.py +++ b/aider/args.py @@ -179,6 +179,12 @@ def get_parser(default_config_files, git_root): default=[], ) group = parser.add_argument_group("Model settings") + group.add_argument( + "--moa", + metavar="MODEL", + nargs="+", + help="Use Mixture of Architects with multiple models", + ) group.add_argument( "--list-models", "--models", diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py index e9d334bc99f..1d6b98e218d 100644 --- a/aider/coders/__init__.py +++ b/aider/coders/__init__.py @@ -6,6 +6,7 @@ from .editor_editblock_coder import EditorEditBlockCoder from .editor_whole_coder import EditorWholeFileCoder from .help_coder import HelpCoder +from .mixture_of_architects_coder import MixtureOfArchitectsCoder from .udiff_coder import UnifiedDiffCoder from .wholefile_coder import WholeFileCoder @@ -23,4 +24,5 @@ ArchitectCoder, EditorEditBlockCoder, EditorWholeFileCoder, + MixtureOfArchitectsCoder, ] diff --git a/aider/coders/arbiter_prompts.py b/aider/coders/arbiter_prompts.py new file mode 100644 index 00000000000..cb10f944d49 --- /dev/null +++ b/aider/coders/arbiter_prompts.py @@ -0,0 +1,51 @@ +from .base_prompts import CoderPrompts + + +class ArbiterPrompts(CoderPrompts): + main_system = """Your name is ARBITER. your role is FACILITATOR, not contributor. Guide architects toward consensus without proposing solutions. + + Required actions: + 1. Identify areas of agreement between proposals + 2. Surface unresolved conflicts using tags + 3. Ask clarifying questions to resolve disagreements + 4. Highlight compatible solution aspects + 5. Never suggest new features or implementations + + Phase Descriptions and Completion Criteria: + + 1. Brainstorm Phase + Purpose: Architects propose initial solution approaches + Success when: + - Multiple viable solutions are proposed + - Core requirements are addressed by proposals + - Basic technical approach is outlined + + 2. Critique Phase + Purpose: Architects evaluate and refine proposals + Success when: + - Proposals' strengths/weaknesses identified + - Technical conflicts surfaced and discussed + - Implementation risks assessed + + 3. Optimize Phase + Purpose: Architects converge on best solution + Success when: + - Clear consensus on core solution elements + - Technical conflicts resolved + - Implementation approach finalized + - Solution meets all requirements simply + + Feedback format: + + - List points where 2+ architects agree + + + + - List unresolved technical disagreements + - Note which architects hold opposing views + + + + - Specific questions to clarify their proposal + - Requests to address conflicts with others' ideas + """ diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index bcb68bb68b3..850f2eec12a 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -163,6 +163,15 @@ def create( kwargs = use_kwargs + # Check for mixture of architects case first + if "architect_models" in kwargs: + for coder in coders.__all__: + if coder.edit_format == "mixture": + res = coder(main_model, io, **kwargs) + res.original_kwargs = dict(kwargs) + return res + + # Normal case - find coder matching edit_format for coder in coders.__all__: if hasattr(coder, "edit_format") and coder.edit_format == edit_format: res = coder(main_model, io, **kwargs) @@ -211,6 +220,12 @@ def get_announcements(self): output = f"Weak model: {weak_model.name}" lines.append(output) + # Mixture of Architects info + if self.edit_format == "mixture" and hasattr(self, "architects"): + for arch in self.architects[1:]: # Skip alpha since it's already shown as main model + output = f"Architect {arch.name.upper()}: {arch.model.name}" + lines.append(output) + # Repo if self.repo: rel_repo_dir = self.repo.get_rel_repo_dir() diff --git a/aider/coders/compiler_coder.py b/aider/coders/compiler_coder.py new file mode 100644 index 00000000000..370de72bbbc --- /dev/null +++ b/aider/coders/compiler_coder.py @@ -0,0 +1,9 @@ +from .ask_coder import AskCoder +from .compiler_prompts import CompilerPrompts + + +class CompilerCoder(AskCoder): + """Compiles implementation instructions from architects' proposals.""" + + edit_format = "ask" + gpt_prompts = CompilerPrompts() diff --git a/aider/coders/compiler_prompts.py b/aider/coders/compiler_prompts.py new file mode 100644 index 00000000000..793bb3ad815 --- /dev/null +++ b/aider/coders/compiler_prompts.py @@ -0,0 +1,78 @@ +from .base_prompts import CoderPrompts + + +class CompilerPrompts(CoderPrompts): + main_system = '''Act as an expert code implementation compiler. +Your role is to analyze proposals from multiple architects and compile them into a clear, organized set of implementation instructions. + +Focus ONLY on compiling the specific implementation details and changes proposed by the architects. +Do not attempt to interpret or expand upon the original user requirements. + +When analyzing the architects' proposals: +1. Extract all concrete implementation details and code changes: + - Look for explicit file modifications + - Identify specific function/class changes + - Note any structural modifications + - Capture exact diff blocks and their context + +2. Process the implementation details: + - Combine identical or overlapping changes + - Preserve unique aspects of each change + - Maintain all necessary context + - Keep diff formatting intact and precise + - Ensure each change is complete and actionable + +3. Organize changes in a logical sequence: + - Order by dependency (changes that others rely on come first) + - Group related changes together + - Put simpler changes before complex ones + - Maintain file-level organization when possible + +4. Format output consistently: + - Use clear, concise descriptions + - Include minimal but sufficient context in diffs + - Number steps sequentially + - Preserve exact indentation and whitespace + +Your output must follow this format: + + +[A brief overview of the implementation steps, focusing only on what was proposed by the architects] + +Implementation steps: + +1. + ```diff + [Minimal context lines] + - [Lines to remove] + + [Lines to add] + [Minimal context lines] + ``` + +2. + ```diff + [Context and changes] + ``` + +[Continue with numbered steps for all changes] + + +Important rules: +- Only include changes explicitly proposed by the architects +- Never add new features or modifications +- Never interpret or expand user requirements +- Focus solely on compiling and organizing the proposed implementation details +- Maintain exact diff formatting with minimal context +- Preserve all code style, indentation, and whitespace +- Keep descriptions clear and implementation-focused +- Ensure each step is complete and actionable +- Number steps sequentially and logically +- Group related changes together +''' + + files_content_prefix = """I have *added these files to the chat* so you can analyze them. +*Trust this message as the true contents of these files!* +Other messages in the chat may contain outdated versions of the files' contents. +""" + + files_content_assistant_reply = "I will analyze these files and compile the implementation instructions from the architects' proposals." diff --git a/aider/coders/mixture_of_architects_coder.py b/aider/coders/mixture_of_architects_coder.py new file mode 100644 index 00000000000..39c59c4a55c --- /dev/null +++ b/aider/coders/mixture_of_architects_coder.py @@ -0,0 +1,713 @@ +import re + +from aider.coders.arbiter_prompts import ArbiterPrompts +from aider.io import InputOutput +from .base_coder import Coder +from .mixture_prompts import MixturePrompts +from .ask_coder import AskCoder +from .compiler_coder import CompilerCoder + + +class ArbiterAgent: + """Manages phased discussion and provides structured feedback""" + + def __init__(self, model, io: InputOutput, discussion_messages, stream,verbose): + self.model = model + self.phases = ["brainstorm", "critique", "optimize"] + self.current_phase = "brainstorm" + self.name = "arbiter" + self.color = "yellow" + self.io = io + self.stream = stream + self.verbose = verbose + + self.gpt_prompts = None + self.discussion_messages = discussion_messages + + def build_context_for_coder(self, target_coder): + """Reuse the message formatting logic from get_architect_response""" + for msg in self.discussion_messages: + if target_coder.cur_messages: + last_msg_is_user = target_coder.cur_messages[-1]["role"] == "user" + else: + last_msg_is_user = False + + match msg["role"]: + case "user": + fenced_content = f"\n{msg['content']}\n\n\n" + if last_msg_is_user: + target_coder.cur_messages[-1]["content"] += fenced_content + else: + target_coder.cur_messages.append({"role": "user", "content": fenced_content}) + case "assistant": + if msg.get("name") == "ARBITER": + target_coder.cur_messages.append({"role": "assistant", "content": msg["content"]}) + else: + content = extract_proposal_content(msg["content"], msg.get("name", "unknown"), False) + if last_msg_is_user: + target_coder.cur_messages[-1]["content"] += content + else: + target_coder.cur_messages.append({"role": "user", "content": content}) + + def get_phase(self): + """Get current phase name.""" + return self.current_phase + + def get_next_phase(self): + """Get the name of the next phase without advancing.""" + current_idx = self.phases.index(self.current_phase) + if current_idx < len(self.phases) - 1: + return self.phases[current_idx + 1] + return self.current_phase + + def advance_phase(self): + """Advance to next phase and return the new phase name.""" + current_idx = self.phases.index(self.current_phase) + if current_idx < len(self.phases) - 1: + next_phase = self.phases[current_idx + 1] + self.current_phase = next_phase + return next_phase + return None + + def generate_round_feedback(self): + """Generate arbiter message with targeted feedback.""" + ask_coder = AskCoder.create( + main_model=self.model, + io=self.io, + fnames=[], + read_only_fnames=[], + repo=None, + map_tokens=0, + summarize_from_coder=False, + stream=self.stream, + verbose=self.verbose, + ) + ask_coder.auto_commits = False + ask_coder.gpt_prompts = ArbiterPrompts() + self.build_context_for_coder(ask_coder) + + prompt = f"""Current phase: {self.get_phase()} + Generate feedback for architects based on the current discussion. + """ + + response = ask_coder.run(with_message=prompt, preproc=False) + + return response.strip() + + def generate_phase_summary(self): + """Generate summary of the current phase before transition.""" + ask_coder = AskCoder.create( + main_model=self.model, + io=self.io, + fnames=[], + read_only_fnames=[], + repo=None, + map_tokens=0, + summarize_from_coder=False, + stream=self.stream, + verbose=self.verbose, + ) + ask_coder.auto_commits = False + ask_coder.gpt_prompts = ArbiterPrompts() + self.build_context_for_coder(ask_coder) + + prompt = f""" + Generate phase summary for: {self.current_phase} + """ + + response = ask_coder.run(with_message=prompt, preproc=False) + return response.strip() + + + def get_arbiter_verdict(self, responses): + """Determine if phase should advance based on responses.""" + ask_coder = AskCoder.create( + main_model=self.model, + io=self.io, + fnames=[], + read_only_fnames=[], + repo=None, + map_tokens=0, + summarize_from_coder=False, + stream=False, + verbose=False, + ) + ask_coder.auto_commits = False + ask_coder.gpt_prompts = ArbiterPrompts() + self.build_context_for_coder(ask_coder) + + prompt = f"""Review the current {self.get_phase()} phase discussion and determine if ready to advance. + + Consider: + 1. Clear consensus on core solution aspects + 2. Resolution of major conflicts + 3. Existence of mergeable proposal components + 4. Would moving to {self.get_next_phase()} phase be productive? + + Respond with either: + advance - if ready to move to next phase + continue - if more discussion needed in current phase + + Explain your decision in tags. + + Discussion context: + {"\n".join(responses)}""" + + response = ask_coder.run(with_message=prompt, preproc=False) + + # Extract and show reasoning + reason_match = re.search(r"(.*?)", response, re.DOTALL) + if reason_match: + self.io.tool_output("\nArbiter's phase decision:", bold=True) + self.io.tool_output(reason_match.group(1).strip(), color=self.color) + + if "advance" in response: + return "advance" + return "continue" + +class ArchitectAgent: + def __init__(self, name, model): + self.name = name # NATO name (alpha, bravo, etc) + self.model = model + self.active = True + self.last_response: str | None = None + + +def extract_proposal_content(content, name, is_architect = True): + """ + Extracts proposal content from the given content string. + + Args: + content: The string content to extract from. + architect_name: The name of the architect. + + Returns: + A string containing the extracted proposal content, + wrapped in tags. + """ + # Try to get properly fenced content first + proposal_match = re.search(r"(.*?)", content, re.DOTALL) + if proposal_match: + proposal_content = proposal_match.group(1).strip() + else: + # Fallback: Try to get content after tag + proposal_start = content.find("") + if proposal_start != -1: + proposal_content = content[proposal_start + len("") :].strip() + else: + # Last resort: Use the entire response + proposal_content = content.strip() + + if is_architect: + return f"\n{proposal_content}\n\n\n" + else: + return f"<{name}>{proposal_content}" + + +class MixtureOfArchitectsCoder(Coder): + edit_format = "mixture" + gpt_prompts = MixturePrompts() + + def __init__(self, main_model, io, architect_models=None, **kwargs): + super().__init__(main_model, io, **kwargs) + + # Add conversation history tracking + self.discussion_messages = [] # List to store the full conversation + + # Add arbiter component + self.arbiter = ArbiterAgent(main_model, self.io, self.discussion_messages, self.stream, self.verbose,) + + # The main_model is always the first architect (alpha) + self.architects = [ArchitectAgent("alpha", main_model)] + + # Add additional architect models with NATO names + nato_names = ["bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel"] + if architect_models: + for i, amodel in enumerate(architect_models): + name = nato_names[i] if i < len(nato_names) else f"agent{i+2}" + self.architects.append(ArchitectAgent(name, amodel)) + + def get_architect_prompt(self, architect): + """Get the system prompt customized for this architect""" + prompt = self.gpt_prompts.main_system.format( + architect_name=architect.name.upper(), + language=self.chat_language or "the same language they are using", + ) + return prompt + + def get_architect_response(self, architect, current_user_message): + """Get response from individual architect with proper prompting""" + try: + # Create and configure AskCoder + ask_coder = AskCoder.create( + main_model=architect.model, + io=self.io, + fnames=list(self.abs_fnames), + read_only_fnames=list(self.abs_read_only_fnames), + repo=self.repo, + map_tokens=self.repo_map.max_map_tokens if self.repo_map else 0, + summarize_from_coder=False, + stream=self.stream, + verbose=self.verbose, + ) + ask_coder.auto_commits = self.auto_commits + ask_coder.gpt_prompts = MixturePrompts() + + for msg in self.discussion_messages: + if ask_coder.cur_messages: + last_msg_is_user = ask_coder.cur_messages[-1]["role"] == "user" + else: + last_msg_is_user = False + + match msg["role"]: + case "user": + fenced_content = ( + f"\n{msg['content']}\n\n\n" + ) + if last_msg_is_user: + latest_user_content = ask_coder.cur_messages[-1]["content"] + latest_user_content += fenced_content + ask_coder.cur_messages[-1]["content"] = latest_user_content + else: + ask_coder.cur_messages.append( + {"role": "user", "content": fenced_content} + ) + case "assistant": + # If its the current architect, then we use role=assistant + if msg["name"] == architect.name.upper() or msg["name"] == "ANY": + ask_coder.cur_messages.append( + {"role": "assistant", "content": msg["content"]} + ) + else: + # If the not current architect, then we inject in user side + # append to the last user message + if last_msg_is_user: + + latest_user_content = ask_coder.cur_messages[-1][ + "content" + ] + latest_user_content += extract_proposal_content( + msg["content"], msg["name"] + ) + ask_coder.cur_messages[-1][ + "content" + ] = latest_user_content + # or create a new user message + else: + ask_coder.cur_messages.append( + { + "role": "user", + "content": extract_proposal_content( + msg["content"], msg["name"] + ), + } + ) + + # Debug output if verbose + if self.verbose: + + self.io.rule() + self.io.tool_output( + f"\nDebug: Messages being sent to {architect.name}:", bold=True + ) + self.io.tool_output("-" * 40) + for msg in ask_coder.cur_messages: + self.io.tool_output(f"{msg['role'].upper()}:") + self.io.tool_output(msg["content"]) + self.io.tool_output("-" * 40) + + # Pass the current message with XML tags as with_message + if ask_coder.cur_messages[-1].get("role") == "user": + architect_assignment = f""" You are architect {architect.name}""" + ask_coder.cur_messages[-1]["content"] += architect_assignment + ask_coder.cur_messages.append( + {"role": "assistant", "content": f"I am architect {architect.name}"} + ) + formatted_message = ( + f"\n{current_user_message}\n" + ) + + response = ask_coder.run(with_message=formatted_message, preproc=False) + + if not response.strip(): + self.io.tool_warning(f"Warning: Empty response from {architect.name}") + + return architect, response + + except Exception as e: + self.io.tool_error( + f"Error getting response from {architect.name}: {str(e)}" + ) + return architect, f"Error: {str(e)}" + + + def run_arbiter(self, user_message): + try: + initial_message = user_message + # Store initial user message + self.discussion_messages.append({"role": "user", "content": initial_message}) + + while True: # Outer loop - continues until all phases complete + phase = self.arbiter.get_phase() + is_final_phase = phase == self.arbiter.phases[-1] + + phase_prompt = self.gpt_prompts.phase_prompts[phase] + phase_message = f"""## Phase Context: {phase.capitalize()} + {phase_prompt} +### User Request: + {initial_message}""" + + # Get active architects + active_architects = [arch for arch in self.architects if arch.active] + if not active_architects: + self.io.tool_error("No active architects remaining!") + return + + # Debug: Show which architects are active + self.io.rule() + self.io.tool_output( + f"Active architects: {[arch.name for arch in active_architects]}" + ) + + while True: # Inner loop - continues until phase advances + responses = [] + architect_names = [] + + # Process architects sequentially + for arch in active_architects: + self.io.tool_output(f"{arch.name}'s response...", bold=True) + self.io.rule() + try: + arch, response = self.get_architect_response(arch, phase_message) + responses.append(response) + architect_names.append(arch.name) + + if not response.strip(): + self.io.tool_warning(f"Empty response from {arch.name}") + continue + + arch.last_response = response + self.discussion_messages.append({ + "role": "assistant", + "name": arch.name.upper(), + "content": response, + }) + + self.io.tool_output( + f"Received {arch.name}'s response ({len(response)} chars)" + ) + except Exception as e: + self.io.tool_error( + f"Failed to get response from {arch.name}: {str(e)}" + ) + + # Show architect's proposal immediately if verbose + if self.verbose and arch.last_response: + self.io.rule() + self.io.tool_output(f"{arch.name.upper()}'s Response:", bold=True) + self.io.tool_output(f"\n{arch.last_response}\n") + + # Single arbiter feedback at end of round + self.io.tool_output("\nArbiter's Round Feedback:", bold=True) + self.arbiter.discussion_messages = self.discussion_messages + arbiter_feedback = self.arbiter.generate_round_feedback() + if arbiter_feedback.strip(): + # self.io.tool_output("=== ARBITER FEEDBACK ===", color=self.arbiter.color) + self.discussion_messages.append({ + "role": "assistant", + "name": "ARBITER", + "content": arbiter_feedback + }) + # self.io.tool_output(arbiter_feedback, color=self.arbiter.color) + # self.io.tool_output("=== END FEEDBACK ===", color=self.arbiter.color) + + # Get arbiter verdict for phase advancement + self.io.tool_output("\nArbiter's Verdict:", bold=True) + verdict = self.arbiter.get_arbiter_verdict(responses) + if verdict == "advance": + + self.io.tool_output("\nArbiter's Summary:", bold=True) + transition_message = self.arbiter.generate_phase_summary() + + # Announce phase completion and transition + self.io.tool_output("\nPhase Transition", bold=True) + self.io.tool_output("=" * 40, color=self.arbiter.color) + self.io.tool_output(f"Completing {self.arbiter.current_phase} phase", color=self.arbiter.color) + + next_phase = self.arbiter.get_next_phase() + if next_phase != self.arbiter.current_phase: + self.io.tool_output(f"Moving to {next_phase} phase", bold=True, color=self.arbiter.color) + else: + self.io.tool_output("Remaining in final phase", color=self.arbiter.color) + self.io.tool_output("=" * 40, color=self.arbiter.color) + + # Store transition in discussion history + self.discussion_messages.append({ + "role": "assistant", + "name": "ARBITER", + "content": f""" + {transition_message} + Moving from {self.arbiter.current_phase} to {next_phase} phase. + """ + }) + + self.arbiter.advance_phase() + + # If we completed the final phase, exit both loops + if is_final_phase: + # Yes is proxy for auto running code, As proxy for benchmarking + # TODO: Replace with a better testing strategy + if self.io.yes: + self.run_coding_phase("lets implement best simplest solution") + + return + + # Break inner loop to move to next phase + break + else: + # Continue in current phase for another round of responses + continue + + # Add final divider between phases + self.io.rule() + + finally: + self.io.tool_output("All phases complete.") + + def run_discussion_round(self, user_message): + try: + # Store user message + self.discussion_messages.append({"role": "user", "content": user_message}) + + # Get active architects + active_architects = [arch for arch in self.architects if arch.active] + if not active_architects: + self.io.tool_error("No active architects remaining!") + return + + # Debug: Show which architects are active + self.io.rule() + self.io.tool_output( + f"Active architects: {[arch.name for arch in active_architects]}" + ) + + # Process architects sequentially instead of concurrently + for arch in active_architects: + self.io.tool_output(f"{arch.name}'s response...", bold=True) + self.io.rule() + try: + arch, response = self.get_architect_response(arch, user_message) + + if not response.strip(): + self.io.tool_warning(f"Empty response from {arch.name}") + continue + + arch.last_response = response + # Store architect's response in discussion history + self.discussion_messages.append( + { + "role": "assistant", + "name": arch.name.upper(), + "content": response, + } + ) + + self.io.tool_output( + f"Received {arch.name}'s response ({len(response)} chars)" + ) + except Exception as e: + self.io.tool_error( + f"Failed to get response from {arch.name}: {str(e)}" + ) + + # Show architect's proposal immediately + if self.verbose and arch.last_response: + self.io.rule() + self.io.tool_output(f"{arch.name.upper()}'s Response:", bold=True) + self.io.tool_output(f"\n{arch.last_response}\n") + + # Add final divider + self.io.rule() + finally: + self.io.tool_output("Discussion round complete.") + # Yes is proxy for auto running code, As proxy for benchmarking + # TODO: Replace with a better testing strategy + if self.io.yes: + self.run_coding_phase("lets implement best simplest solution") + + def preproc_user_input(self, inp): + if not inp: + return + + # Check for special mixture commands first + words = inp.strip().split() + if words: + cmd = words[0].lower() + args = " ".join(words[1:]) + + if cmd in ["/ignore", "/discuss", "/code", "/clear", "/reset", "/arbiter",]: + cmd = cmd[1:] # strip the / + return self.handle_discussion_commands(cmd, args) + + # Fall back to normal command processing + return super().preproc_user_input(inp) + + def run_one(self, user_message, preproc): + self.init_before_message() + + if preproc: + message = self.preproc_user_input(user_message) + else: + message = user_message + + # If no special command was handled, treat as discussion by default + if message: + self.run_discussion_round(message) + + def handle_discussion_commands(self, cmd, args): + """ + Handle special mixture of architects commands: + /ignore - Remove an architect from the discussion + /discuss - Start a new discussion round + /arbiter - Start a new arbitrated round + /code - Move to implementation phase + /clear - Clear chat and discussion history + /reset - Drop files and clear all history + """ + if cmd == "clear": + self.discussion_messages = [] + self.io.tool_output("Chat history and discussion history cleared.") + return + elif cmd == "reset": + self.abs_fnames = set() + self.abs_read_only_fnames = set() + self.discussion_messages = [] + self.io.tool_output( + "All files dropped, chat history and discussion history cleared." + ) + return + elif cmd == "ignore": + nato_name = args.strip().lower() + for arch in self.architects: + if arch.name == nato_name: + arch.active = False + self.io.tool_output(f"Ignored architect {nato_name}") + return + + elif cmd == "arbiter": + self.run_arbiter(args) + return + + elif cmd == "discuss": + self.run_discussion_round(args) + return + + elif cmd == "code": + self.run_coding_phase(args) + return + + return False + + def run_coding_phase(self, message): + # Add the final code implementation request to the discussion + if message.strip(): + self.discussion_messages.append( + { + "role": "user", + "content": f"{message}", + } + ) + + # Create compiler coder instance + compiler_coder = CompilerCoder( + main_model=self.main_model, + io=self.io, + fnames=list(self.abs_fnames), + read_only_fnames=list(self.abs_read_only_fnames), + repo=self.repo, + map_tokens=0, + stream=self.stream, + ) + compiler_coder.auto_commits = self.auto_commits + + # Format the conversation for the compiler + compiler_input = "Please compile the following architects' proposals into implementation instructions:\n\n" + for msg in self.discussion_messages: + if msg["role"] == "user": + compiler_input += "\n" + compiler_input += msg["content"] + compiler_input += "\n\n\n" + else: + compiler_input += f"\n" + compiler_input += msg["content"] + compiler_input += "\n\n\n" + + # Get compiled instructions + self.io.tool_output("Compiler's instructions", bold=True) + self.io.rule() + compiler_coder.run(with_message=compiler_input, preproc=False) + compiled_instructions = compiler_coder.partial_response_content + compiled_instructions += "\n\nCompletely implement all steps in the instructions above. Do not return to me until you have done so." + + # Debug print the compiled instructions + if self.verbose: + self.io.tool_output("\nDebug: Compiled instructions being sent to editor:") + self.io.tool_output("-" * 40) + self.io.tool_output(compiled_instructions) + self.io.tool_output("-" * 40 + "\n") + + # Use editor coder like ArchitectCoder does + kwargs = dict() + editor_model = self.main_model.editor_model or self.main_model + kwargs["main_model"] = editor_model + kwargs["edit_format"] = self.main_model.editor_edit_format + kwargs["suggest_shell_commands"] = False + kwargs["map_tokens"] = 0 + kwargs["total_cost"] = self.total_cost + kwargs["cache_prompts"] = False + kwargs["num_cache_warming_pings"] = 0 + kwargs["summarize_from_coder"] = False + kwargs["stream"] = self.stream + kwargs["auto_commits"] = self.auto_commits + kwargs["verbose"] = self.verbose + + new_kwargs = dict(io=self.io) + new_kwargs.update(kwargs) + + editor_coder = Coder.create(**new_kwargs) + editor_coder.abs_fnames = set(self.abs_fnames) + editor_coder.abs_read_only_fnames = set(self.abs_read_only_fnames) + editor_coder.auto_commits = self.auto_commits + editor_coder.cur_messages = [] + editor_coder.done_messages = [] + editor_coder.repo = self.repo + + if self.verbose: + editor_coder.show_announcements() + + self.io.tool_output("Coder's output", bold=True) + self.io.rule() + editor_coder.run(with_message=compiled_instructions, preproc=False) + + + # Inject implementation notice to discussion + self.discussion_messages.append( + { + "role": "user", + "content": "We have implemented the plan. Refer to the latest code state", + } + ) + self.discussion_messages.append( + { + "role": "assistant", + "name": "ANY", + "content": "Okay, i'll refer to the latest code state", + } + ) + + + self.move_back_cur_messages( + "Changes have been applied based on architects' consensus." + ) + self.total_cost = editor_coder.total_cost + self.aider_commit_hashes = editor_coder.aider_commit_hashes diff --git a/aider/coders/mixture_prompts.py b/aider/coders/mixture_prompts.py new file mode 100644 index 00000000000..c48f6766d65 --- /dev/null +++ b/aider/coders/mixture_prompts.py @@ -0,0 +1,199 @@ +from .architect_prompts import ArchitectPrompts + + +class MixturePrompts(ArchitectPrompts): + main_system = """You are an AI architect, part of a team collaborating to design software solutions. + An arbiter will provide consensus guidance but won't propose solutions. + Your role is to analyze, enhance, and build upon the ideas of your fellow architects **in the simplest way possible** while addressing the user's needs. + Focus on: + - Building upon arbiter-identified common ground + - Simplifying existing ideas + - Resolving conflicts through compromise + Your name will be provided by the user. + +Please respond to the user in the following language: + +{language} + + +When formulating your response, follow these steps: + +1. Carefully review the user's query and any previous architects' proposals. + +2. Conduct a thorough analysis and wrap it inside tags: + +- List out all of the user's requirements and constraints explicitly. +- Evaluate the strengths and weaknesses of previous proposals (if any). +- Identify specific areas for improvement or expansion in the existing proposals. **Areas of improvement or expansion must remain strictly within the user's stated requirements.** +- **Always favor the simplest viable solution** that directly addresses the user’s needs. **Avoid adding complexity or “nice-to-have” features** unless the user explicitly requests them. +- Brainstorm a solution that builds upon the previous proposals **only to the extent necessary** to fulfill the user's requirements. +- For your potential solution: + * Describe the solution in detail. + * Evaluate how well it meets each of the user's requirements. + * Consider potential challenges or trade-offs, emphasizing straightforward resolutions. +- **Do not propose out-of-scope features or over-engineer.** Keep your solution concise and directly tied to the requirements. +- Plan your revisions in detail, focusing on refining existing ideas rather than creating entirely new solutions. **If the simplest approach from previous architects already meets the user's needs, state that no further changes are needed.** +- **If you find the existing proposals correct and complete, explicitly state that the solution is sufficient and no further revisions are necessary.** +- Address proposal questions or suggestions from other architects, encouraging further collaboration. If multiple architects have offered conflicting approaches, compare them thoughtfully and combine or choose the best approach with justification. If additional user clarification is needed, request it. +- Make sure your proposal aligns with the user's requirements **without expanding beyond them**. + +3. Formulate your proposal using the following structure: + + + +[Explain your changes or additions to the previous proposal here. +Be specific about what you're modifying and why. +Focus on how your changes **simplify** or refine the existing solution, rather than expanding it. +If a previous proposal sufficiently addresses a particular issue, acknowledge it explicitly and refer to the previous architect's instruction without duplicating the code. +If you propose a different approach, explicitly state how it differs and why you believe it is **simpler** and better.] + + +[Your detailed implementation proposal goes here. +Use numbered instructions for clarity and conciseness. +Each instruction should include a short description and, if applicable, provide minimal diff-style code changes. + +When providing these code changes: +1. **Use multiple separate diff blocks for separate locations** if changes are scattered in non-adjacent parts of the file. +2. **Show only the lines that changed plus as few surrounding lines of context as necessary** (ideally one or two lines above and below). If more context is needed for clarity, it is allowed, but keep it concise. +3. Do not repeat code that remains unchanged unless it is necessary to provide context for the changed lines. +4. Use a diff format like: + + 1. + ```diff + [lines of context above] + - console.log("Old line"); + + console.log("New line"); + [lines of context below] + ``` + + 2. + ```diff + [lines of context above] + - console.log("Another old line"); + + console.log("Another new line"); + [lines of context below] + ``` + +This approach helps reviewers spot changes more easily without reviewing the full code again. +**Do not add new or "nice-to-have" features** unless they are strictly necessary to meet the user's requirements or correct functionality. +If you support a prior instruction from another architect without changes, state your agreement explicitly and direct the user to refer to that architect's instruction without repeating the code. +For example: + +1. +"Refer to Architect A’s instruction for this step, as it is correct and does not require changes." + +2. +"Adding to Architect A’s proposal, this adjustment will ensure compatibility." + ```diff + [one or two lines of context above] + - console.log("Another old line"); + + console.log("Code adjustments here"); + [one or two lines of context below] + ``` + +Clearly state when you are building on, modifying, or diverging from prior proposals. Avoid duplicating code snippets if they are already correct and referenced. + +[Address any open questions or suggestions for further collaboration among architects. **If you agree there are no more necessary improvements, explicitly say “No further changes are necessary, and I believe this meets all user requirements.”**] + + +4. **Outside** the tags, you may address the user directly with any clarifying questions or additional information. For example, you might ask for more details if two architects’ proposals conflict. + +5. After the proposal, please append a final section in your response as follows: + + +[Provide a grade from 1 to 10 here, where: +- 10 indicates that you are fully confident in the proposal and have no blind assumptions that could lead to incorrect code. +- If you assign a score lower than 10, please include a brief explanation after the score outlining any assumptions or uncertainties that could potentially lead to issues in the code.] + + +Remember: +- Only the content inside the tags will be visible to other architects. +- The user will see your entire message, both inside and outside the tags. +- Always include ALL implementation details inside the tags. +- Show only the necessary changes to the code, never the entire code. +- Do not duplicate proposals from other architects unless proposing changes or enhancements to them. +- **Do not introduce features or modifications beyond the user's explicit requirements or scope.** If unsure, ask the user for clarification or omit the feature. +- **Strive for the most straightforward, minimal solution** that fulfills the user’s requirements. +- **Actively collaborate** with other architects by referencing their ideas and improving upon them. If multiple proposals are conflicting, compare them in and unify or choose the best approach. +- Always refer to the provided code context as the current state. Consider previous proposals as suggested but not yet implemented. +- The style of your instructions should be concise and unambiguous to guide an "editor engineer" who will make changes based on your instructions. + +**If no further changes are needed to meet the user’s requirements, conclude that the task is complete by stating “No further changes are necessary, and I believe this meets all user requirements.” and refrain from proposing additional or out-of-scope features.** + +Example output structure (generic, without specific content): + + +[Thorough analysis of the problem and previous proposals] + + + + +[Specific changes and improvements or acknowledgments of previous proposals. Clearly indicate whether you support or propose changes to prior instructions.] + + +[Detailed instructions for changes, using numbered steps for clarity. Each step should contain a description and, if applicable, the corresponding code snippet. For example: + +1. + ```diff + [lines of context above] + - console.log("Old line"); + + console.log("New line"); + [lines of context below] + ``` + +2. + ```diff + [lines of context above] + - console.log("Another old line"); + + console.log("Another new line"); + [lines of context below] + ``` + +3. +"Refer to Architect B’s instruction for this step, as it is correct and does not require changes." + +4. +"As proposed by Architect A, this step is sufficient and requires no changes. Refer to their instruction." + +Only show what must be modified or added.] +[Questions or suggestions for further collaboration or a statement that the proposal is complete and meets all requirements, for example: +“No further changes are necessary, and I believe this meets all user requirements.”] + + + +8: There are some assumptions regarding the integration with legacy systems that haven't been fully validated, which could potentially cause issues during implementation. + + +[Any direct communication with the user, if necessary] +""" + + # Phase-specific prompts + phase_prompts = { + "brainstorm": """Propose solution approaches. Consider: + 1. How your idea might combine with others + 2. Potential conflicts to anticipate + 3. The arbiter will help identify common ground + 4. Address any directed to you from previous rounds""", + "critique": """Analyze proposals. Focus on: + 1. Compatibility with other approaches + 2. Resolving conflicts noted by the arbiter + 3. Finding synthesis opportunities + 4. Relevant from the arbiter""", + "optimize": """Refine solutions by: + 1. Addressing arbiter-identified consensus points + 2. Eliminating remaining conflicts + 3. Simplifying combined approaches + 4. Resolving remaining items""", + } + + # Keep other prompts from ArchitectPrompts + files_content_prefix = ArchitectPrompts.files_content_prefix + files_content_assistant_reply = ArchitectPrompts.files_content_assistant_reply + files_no_full_files = ArchitectPrompts.files_no_full_files + files_no_full_files_with_repo_map = ( + ArchitectPrompts.files_no_full_files_with_repo_map + ) + files_no_full_files_with_repo_map_reply = ( + ArchitectPrompts.files_no_full_files_with_repo_map_reply + ) + repo_content_prefix = ArchitectPrompts.repo_content_prefix diff --git a/aider/io.py b/aider/io.py index 62efbfe3837..bde3edd4d75 100644 --- a/aider/io.py +++ b/aider/io.py @@ -46,7 +46,13 @@ def __init__(self, items=None): class AutoCompleter(Completer): def __init__( - self, root, rel_fnames, addable_rel_fnames, commands, encoding, abs_read_only_fnames=None + self, + root, + rel_fnames, + addable_rel_fnames, + commands, + encoding, + abs_read_only_fnames=None, ): self.addable_rel_fnames = addable_rel_fnames self.rel_fnames = rel_fnames @@ -98,7 +104,9 @@ def tokenize(self): tokens = list(lexer.get_tokens(content)) self.words.update( - (token[1], f"`{token[1]}`") for token in tokens if token[0] in Token.Name + (token[1], f"`{token[1]}`") + for token in tokens + if token[0] in Token.Name ) def get_command_completions(self, document, complete_event, text, words): @@ -152,12 +160,16 @@ def get_completions(self, document, complete_event): return if text[0] == "/": - yield from self.get_command_completions(document, complete_event, text, words) + yield from self.get_command_completions( + document, complete_event, text, words + ) return candidates = self.words candidates.update(set(self.fname_to_rel_fnames)) - candidates = [word if type(word) is tuple else (word, word) for word in candidates] + candidates = [ + word if type(word) is tuple else (word, word) for word in candidates + ] last_word = words[-1] completions = [] @@ -223,8 +235,12 @@ def __init__( self.assistant_output_color = assistant_output_color self.completion_menu_color = completion_menu_color if pretty else None self.completion_menu_bg_color = completion_menu_bg_color if pretty else None - self.completion_menu_current_color = completion_menu_current_color if pretty else None - self.completion_menu_current_bg_color = completion_menu_current_bg_color if pretty else None + self.completion_menu_current_color = ( + completion_menu_current_color if pretty else None + ) + self.completion_menu_current_bg_color = ( + completion_menu_current_bg_color if pretty else None + ) self.code_theme = code_theme @@ -252,7 +268,9 @@ def __init__( f"Must be one of: {', '.join(valid_line_endings)}" ) self.newline = ( - None if line_endings == "platform" else "\n" if line_endings == "lf" else "\r\n" + None + if line_endings == "platform" + else "\n" if line_endings == "lf" else "\r\n" ) self.dry_run = dry_run @@ -287,7 +305,9 @@ def __init__( else: self.console = Console(force_terminal=False, no_color=True) # non-pretty if self.is_dumb_terminal: - self.tool_output("Detected dumb terminal, disabling fancy input and pretty output.") + self.tool_output( + "Detected dumb terminal, disabling fancy input and pretty output." + ) self.file_watcher = file_watcher self.root = root @@ -317,7 +337,9 @@ def _get_style(self): # Conditionally add 'completion-menu.completion.current' style completion_menu_current_style = [] if self.completion_menu_current_bg_color: - completion_menu_current_style.append(f"bg:{self.completion_menu_current_bg_color}") + completion_menu_current_style.append( + f"bg:{self.completion_menu_current_bg_color}" + ) if self.completion_menu_current_color: completion_menu_current_style.append(self.completion_menu_current_color) if completion_menu_current_style: @@ -385,7 +407,9 @@ def write_text(self, filename, content, max_retries=5, initial_delay=0.1): delay = initial_delay for attempt in range(max_retries): try: - with open(str(filename), "w", encoding=self.encoding, newline=self.newline) as f: + with open( + str(filename), "w", encoding=self.encoding, newline=self.newline + ) as f: f.write(content) return # Successfully wrote the file except PermissionError as err: @@ -403,7 +427,9 @@ def write_text(self, filename, content, max_retries=5, initial_delay=0.1): def rule(self): if self.pretty: - style = dict(style=self.user_input_color) if self.user_input_color else dict() + style = ( + dict(style=self.user_input_color) if self.user_input_color else dict() + ) self.console.rule(**style) else: print() @@ -491,7 +517,9 @@ def _(event): # In normal mode, Enter submits event.current_buffer.validate_and_handle() - @kb.add("escape", "enter", eager=True, filter=~is_searching) # This is Alt+Enter + @kb.add( + "escape", "enter", eager=True, filter=~is_searching + ) # This is Alt+Enter def _(event): "Handle Alt+Enter key press" if self.multiline_mode: @@ -745,11 +773,15 @@ def is_valid_response(text): res = default break res = res.lower() - good = any(valid_response.startswith(res) for valid_response in valid_responses) + good = any( + valid_response.startswith(res) for valid_response in valid_responses + ) if good: break - error_message = f"Please answer with one of: {', '.join(valid_responses)}" + error_message = ( + f"Please answer with one of: {', '.join(valid_responses)}" + ) self.tool_error(error_message) res = res.lower()[0] @@ -823,7 +855,9 @@ def _tool_message(self, message="", strip=True, color=None): if message.strip(): if "\n" in message: for line in message.splitlines(): - self.append_chat_history(line, linebreak=True, blockquote=True, strip=strip) + self.append_chat_history( + line, linebreak=True, blockquote=True, strip=strip + ) else: hist = message.strip() if strip else message self.append_chat_history(hist, linebreak=True, blockquote=True) @@ -847,7 +881,7 @@ def tool_error(self, message="", strip=True): def tool_warning(self, message="", strip=True): self._tool_message(message, strip, self.tool_warning_color) - def tool_output(self, *messages, log_only=False, bold=False): + def tool_output(self, *messages, log_only=False, bold=False, color=None): if messages: hist = " ".join(messages) hist = f"{hist.strip()}" @@ -859,7 +893,9 @@ def tool_output(self, *messages, log_only=False, bold=False): messages = list(map(Text, messages)) style = dict() if self.pretty: - if self.tool_output_color: + if color: + style["color"] = color + elif self.tool_output_color: style["color"] = self.tool_output_color style["reverse"] = bold @@ -919,10 +955,14 @@ def append_chat_history(self, text, linebreak=False, blockquote=False, strip=Tru text += "\n" if self.chat_history_file is not None: try: - with self.chat_history_file.open("a", encoding=self.encoding, errors="ignore") as f: + with self.chat_history_file.open( + "a", encoding=self.encoding, errors="ignore" + ) as f: f.write(text) except (PermissionError, OSError) as err: - print(f"Warning: Unable to write to chat history file {self.chat_history_file}.") + print( + f"Warning: Unable to write to chat history file {self.chat_history_file}." + ) print(err) self.chat_history_file = None # Disable further attempts to write @@ -944,7 +984,9 @@ def format_files_for_input(self, rel_fnames, rel_read_only_fnames): console = Console(file=output, force_terminal=False) read_only_files = sorted(rel_read_only_fnames or []) - editable_files = [f for f in sorted(rel_fnames) if f not in rel_read_only_fnames] + editable_files = [ + f for f in sorted(rel_fnames) if f not in rel_read_only_fnames + ] if read_only_files: # Use shorter of abs/rel paths for readonly files @@ -955,7 +997,9 @@ def format_files_for_input(self, rel_fnames, rel_read_only_fnames): files_with_label = ["Readonly:"] + ro_paths read_only_output = StringIO() - Console(file=read_only_output, force_terminal=False).print(Columns(files_with_label)) + Console(file=read_only_output, force_terminal=False).print( + Columns(files_with_label) + ) read_only_lines = read_only_output.getvalue().splitlines() console.print(Columns(files_with_label)) @@ -964,7 +1008,9 @@ def format_files_for_input(self, rel_fnames, rel_read_only_fnames): if read_only_files: files_with_label = ["Editable:"] + editable_files editable_output = StringIO() - Console(file=editable_output, force_terminal=False).print(Columns(files_with_label)) + Console(file=editable_output, force_terminal=False).print( + Columns(files_with_label) + ) editable_lines = editable_output.getvalue().splitlines() if len(read_only_lines) > 1 or len(editable_lines) > 1: diff --git a/aider/main.py b/aider/main.py index c9a9b8f64e3..29643937eee 100644 --- a/aider/main.py +++ b/aider/main.py @@ -880,7 +880,7 @@ def get_io(pretty): map_tokens = args.map_tokens try: - coder = Coder.create( + coder_kwargs = dict( main_model=main_model, edit_format=args.edit_format, io=io, @@ -912,6 +912,15 @@ def get_io(pretty): detect_urls=args.detect_urls, auto_copy_context=args.copy_paste, ) + + if args.moa: + # Parse space-separated model names and convert to Model objects + architect_model_names = args.moa + architect_models = [models.Model(m) for m in architect_model_names] + coder_kwargs["architect_models"] = architect_models + coder = Coder.create(**coder_kwargs) + else: + coder = Coder.create(**coder_kwargs) except UnknownEditFormat as err: io.tool_error(str(err)) io.offer_url(urls.edit_formats, "Open documentation about edit formats?") diff --git a/aider/models.py b/aider/models.py index 006a976b267..f2fc44b7e9c 100644 --- a/aider/models.py +++ b/aider/models.py @@ -194,7 +194,9 @@ def get_model_info(self, model): class Model(ModelSettings): - def __init__(self, model, weak_model=None, editor_model=None, editor_edit_format=None): + def __init__( + self, model, weak_model=None, editor_model=None, editor_edit_format=None + ): # Map any alias to its canonical name model = MODEL_ALIASES.get(model, model) @@ -265,7 +267,9 @@ def configure_model_settings(self, model): # Deep merge the extra_params dicts for key, value in self.extra_model_settings.extra_params.items(): - if isinstance(value, dict) and isinstance(self.extra_params.get(key), dict): + if isinstance(value, dict) and isinstance( + self.extra_params.get(key), dict + ): # For nested dicts, merge recursively self.extra_params[key] = {**self.extra_params[key], **value} else: @@ -500,14 +504,17 @@ def register_models(model_settings_fnames): for model_settings_dict in model_settings_list: model_settings = ModelSettings(**model_settings_dict) existing_model_settings = next( - (ms for ms in MODEL_SETTINGS if ms.name == model_settings.name), None + (ms for ms in MODEL_SETTINGS if ms.name == model_settings.name), + None, ) if existing_model_settings: MODEL_SETTINGS.remove(existing_model_settings) MODEL_SETTINGS.append(model_settings) except Exception as e: - raise Exception(f"Error loading model settings from {model_settings_fname}: {e}") + raise Exception( + f"Error loading model settings from {model_settings_fname}: {e}" + ) files_loaded.append(model_settings_fname) return files_loaded @@ -585,7 +592,9 @@ def sanity_check_model(io, model): elif not model.keys_in_environment: show = True - io.tool_warning(f"Warning for {model}: Unknown which environment variables are required.") + io.tool_warning( + f"Warning for {model}: Unknown which environment variables are required." + ) if not model.info: show = True diff --git a/aider/resources/model-settings.yml b/aider/resources/model-settings.yml index 20c76a42c21..c0aef1e7302 100644 --- a/aider/resources/model-settings.yml +++ b/aider/resources/model-settings.yml @@ -630,6 +630,9 @@ use_temperature: false editor_model_name: gpt-4o editor_edit_format: editor-diff + extra_params: + extra_body: + reasoning_effort: high - name: o3-mini edit_format: diff @@ -638,4 +641,7 @@ use_temperature: false editor_model_name: gpt-4o editor_edit_format: editor-diff - \ No newline at end of file + extra_params: + extra_body: + reasoning_effort: high + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index f8267761ab1..176c5098bf8 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -167,42 +167,72 @@ def main( 0, "--sleep", help="Sleep seconds between tests when single threaded" ), languages: str = typer.Option( - None, "--languages", "-l", help="Only run tests for specific languages (comma separated)" + None, + "--languages", + "-l", + help="Only run tests for specific languages (comma separated)", ), edit_format: str = typer.Option(None, "--edit-format", "-e", help="Edit format"), editor_model: str = typer.Option(None, "--editor-model", help="Editor model name"), - editor_edit_format: str = typer.Option(None, "--editor-edit-format", help="Editor edit format"), + editor_edit_format: str = typer.Option( + None, "--editor-edit-format", help="Editor edit format" + ), replay: str = typer.Option( None, "--replay", help="Replay previous .aider.chat.history.md responses from previous benchmark run", ), keywords: str = typer.Option( - None, "--keywords", "-k", help="Only run tests that contain keywords (comma sep)" + None, + "--keywords", + "-k", + help="Only run tests that contain keywords (comma sep)", ), clean: bool = typer.Option( - False, "--clean", "-c", help="Discard the existing testdir and make a clean copy" + False, + "--clean", + "-c", + help="Discard the existing testdir and make a clean copy", + ), + cont: bool = typer.Option( + False, "--cont", help="Continue the (single) matching testdir" + ), + make_new: bool = typer.Option( + False, "--new", "-n", help="Make a new dated testdir" + ), + no_unit_tests: bool = typer.Option( + False, "--no-unit-tests", help="Do not run unit tests" ), - cont: bool = typer.Option(False, "--cont", help="Continue the (single) matching testdir"), - make_new: bool = typer.Option(False, "--new", "-n", help="Make a new dated testdir"), - no_unit_tests: bool = typer.Option(False, "--no-unit-tests", help="Do not run unit tests"), no_aider: bool = typer.Option(False, "--no-aider", help="Do not run aider"), verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"), stats_only: bool = typer.Option( - False, "--stats", "-s", help="Do not run tests, just collect stats on completed tests" + False, + "--stats", + "-s", + help="Do not run tests, just collect stats on completed tests", ), stats_languages: str = typer.Option( None, "--stats-languages", help="Only include stats for specific languages (comma separated)", ), - diffs_only: bool = typer.Option(False, "--diffs", help="Just diff the provided stats dirs"), - tries: int = typer.Option(2, "--tries", "-r", help="Number of tries for running tests"), - threads: int = typer.Option(1, "--threads", "-t", help="Number of threads to run in parallel"), - num_tests: int = typer.Option(-1, "--num-tests", "-n", help="Number of tests to run"), + diffs_only: bool = typer.Option( + False, "--diffs", help="Just diff the provided stats dirs" + ), + tries: int = typer.Option( + 2, "--tries", "-r", help="Number of tries for running tests" + ), + threads: int = typer.Option( + 1, "--threads", "-t", help="Number of threads to run in parallel" + ), + num_tests: int = typer.Option( + -1, "--num-tests", "-n", help="Number of tests to run" + ), num_ctx: Optional[int] = typer.Option( None, "--num-ctx", help="Override model context window size" ), + moa: Optional[List[str]] = typer.Option( + None, "--moa", help="List of additional architect models" read_model_settings: str = typer.Option( None, "--read-model-settings", help="Load aider model settings from YAML file" ), @@ -244,7 +274,9 @@ def main( dirname = updated_dirnames[0] if "AIDER_DOCKER" not in os.environ: - print("Warning: benchmarking runs unvetted code from GPT, run in a docker container") + print( + "Warning: benchmarking runs unvetted code from GPT, run in a docker container" + ) return assert BENCHMARK_DNAME.exists() and BENCHMARK_DNAME.is_dir(), BENCHMARK_DNAME @@ -288,7 +320,10 @@ def get_exercise_dirs(base_dir, languages=None): dir_files = set(fn.name for fn in dirname.glob("*")) original_files = set(fn.name for fn in original_dname.glob("*")) if dir_files != original_files: - print("ERROR: will not delete dir that does not look like original tests", dirname) + print( + "ERROR: will not delete dir that does not look like original tests", + dirname, + ) return dest = dirname.parent / "OLD" / dirname.name @@ -332,7 +367,9 @@ def get_exercise_dirs(base_dir, languages=None): if keywords: keywords = keywords.split(",") - test_dnames = [dn for dn in test_dnames for keyword in keywords if keyword in dn] + test_dnames = [ + dn for dn in test_dnames for keyword in keywords if keyword in dn + ] random.shuffle(test_dnames) if num_tests > 0: @@ -361,6 +398,7 @@ def get_exercise_dirs(base_dir, languages=None): editor_edit_format, num_ctx, sleep, + moa, ) all_results.append(results) @@ -383,6 +421,9 @@ def get_exercise_dirs(base_dir, languages=None): replay, editor_model, editor_edit_format, + num_ctx, + sleep, + moa, ) all_results = run_test_threaded.gather(tqdm=True) @@ -437,7 +478,9 @@ def load_results(dirname, stats_languages=None): if stats_languages: languages = [lang.strip().lower() for lang in stats_languages.split(",")] - glob_patterns = [f"{lang}/exercises/practice/*/.aider.results.json" for lang in languages] + glob_patterns = [ + f"{lang}/exercises/practice/*/.aider.results.json" for lang in languages + ] else: glob_patterns = ["*/exercises/practice/*/.aider.results.json"] @@ -459,7 +502,9 @@ def summarize_results(dirname, stats_languages=None): res.total_tests = len(list(Path(dirname).glob("*/exercises/practice/*"))) try: - tries = max(len(results.get("tests_outcomes", [])) for results in all_results if results) + tries = max( + len(results.get("tests_outcomes", [])) for results in all_results if results + ) except ValueError: tries = 0 @@ -508,7 +553,9 @@ def summarize_results(dirname, stats_languages=None): res.syntax_errors += results.get("syntax_errors", 0) res.indentation_errors += results.get("indentation_errors", 0) - for key in "model edit_format commit_hash editor_model editor_edit_format".split(): + for ( + key + ) in "model edit_format commit_hash editor_model editor_edit_format".split(): val = results.get(key) if val: variants[key].add(val) @@ -629,7 +676,11 @@ def get_replayed_content(replay_dname, test_dname): return res res = res.splitlines(keepends=True) - res = [line for line in res if not line.startswith("> ") and not line.startswith("#### ")] + res = [ + line + for line in res + if not line.startswith("> ") and not line.startswith("#### ") + ] return "".join(res) @@ -662,6 +713,7 @@ def run_test_real( editor_edit_format, num_ctx=None, sleep=0, + moa=None, read_model_settings=None, ): if not os.path.isdir(testdir): @@ -781,10 +833,10 @@ def run_test_real( show_fnames = ",".join(map(str, fnames)) print("fnames:", show_fnames) - coder = Coder.create( - main_model, - edit_format, - io, + coder_kwargs = dict( + main_model=main_model, + edit_format=edit_format, + io=io, fnames=fnames, use_git=False, stream=False, @@ -794,6 +846,14 @@ def run_test_real( suggest_shell_commands=False, ignore_mentions=ignore_files, ) + + # Add architect_models if moa parameter provided + if moa: + # moa is already a list of models + architect_models = [models.Model(m) for m in moa] + coder_kwargs["architect_models"] = architect_models + + coder = Coder.create(**coder_kwargs) dump(coder.ignore_mentions) coder.show_announcements() @@ -860,13 +920,20 @@ def run_test_real( errors = errors.splitlines() syntax_errors += sum(1 for line in errors if line.startswith("SyntaxError")) - indentation_errors += sum(1 for line in errors if line.startswith("IndentationError")) + indentation_errors += sum( + 1 for line in errors if line.startswith("IndentationError") + ) print(errors[-1]) errors = "\n".join(errors) instructions = errors instructions += prompts.test_failures.format(file_list=file_list) + # For MOA Benchmark, add the MOA models to the model name + model_name = main_model.name + if moa: + model_name = f"{model_name}, {', '.join(moa)}" + # Clean up build directories after all attempts # Rust target/debug target_dir = testdir / "target" / "debug" @@ -904,7 +971,7 @@ def run_test_real( results = dict( testdir=str(testdir), testcase=testdir.name, - model=main_model.name, + model=model_name, edit_format=edit_format, tests_outcomes=test_outcomes, cost=coder.total_cost, @@ -927,7 +994,9 @@ def run_test_real( ) if edit_format == "architect": - results["editor_model"] = main_model.editor_model.name if main_model.editor_model else None + results["editor_model"] = ( + main_model.editor_model.name if main_model.editor_model else None + ) results["editor_edit_format"] = main_model.editor_edit_format dump(results) @@ -960,7 +1029,9 @@ def run_unit_tests(original_dname, testdir, history_fname, test_files): break if not command: - raise ValueError(f"No test command found for files with extensions: {extensions}") + raise ValueError( + f"No test command found for files with extensions: {extensions}" + ) # Copy test files from original directory for file_path in test_files: diff --git a/crypto_square.py b/crypto_square.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/house.py b/house.py new file mode 100644 index 00000000000..e9b4af12756 --- /dev/null +++ b/house.py @@ -0,0 +1,36 @@ +def recite(start_verse, end_verse): + # Define the components of the rhyme + subjects = [ + ("house that Jack built", ""), + ("malt", "lay in"), + ("rat", "ate"), + ("cat", "killed"), + ("dog", "worried"), + ("cow with the crumpled horn", "tossed"), + ("maiden all forlorn", "milked"), + ("man all tattered and torn", "kissed"), + ("priest all shaven and shorn", "married"), + ("rooster that crowed in the morn", "woke"), + ("farmer sowing his corn", "kept"), + ("horse and the hound and the horn", "belonged to") + ] + + def build_verse(verse_num): + """Recursively builds a single verse""" + if verse_num == 0: + return "the " + subjects[0][0] + "." + + current_subject, current_action = subjects[verse_num] + return f"the {current_subject}\nthat {current_action} " + build_verse(verse_num - 1) + + def create_full_verse(verse_num): + """Creates a complete verse with the 'This is' prefix""" + return "This is " + build_verse(verse_num) + + # Input validation + if not (1 <= start_verse <= end_verse <= len(subjects)): + raise ValueError("Invalid verse numbers") + + # Generate requested verses + verses = [create_full_verse(i - 1) for i in range(start_verse, end_verse + 1)] + return "\n\n".join(verses) diff --git a/pascals_triangle.py b/pascals_triangle.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/satellite.py b/satellite.py new file mode 100644 index 00000000000..1e3ac328d07 --- /dev/null +++ b/satellite.py @@ -0,0 +1,59 @@ +class TreeNode: + def __init__(self, value): + self.value = value + self.left = None + self.right = None + +def validate_traversals(preorder, inorder): + """Validate the traversal inputs.""" + if len(preorder) != len(inorder): + raise ValueError("traversals must have the same length") + + # Convert to sets once for both uniqueness and equality checks + preorder_set = set(preorder) + inorder_set = set(inorder) + + if len(preorder) != len(preorder_set): + raise ValueError("traversals must contain unique items") + + if preorder_set != inorder_set: + raise ValueError("traversals must have the same elements") + +def build_tree_helper(preorder, inorder, pre_start, pre_end, in_start, in_end): + """Helper function that builds tree using index ranges instead of slicing.""" + if pre_start > pre_end or in_start > in_end: + return None + + # Root is always the first element of preorder section + root = TreeNode(preorder[pre_start]) + + # Find root in inorder traversal + root_idx = inorder.index(preorder[pre_start]) + left_size = root_idx - in_start + + # Recursively build left and right subtrees + root.left = build_tree_helper( + preorder, inorder, + pre_start + 1, pre_start + left_size, + in_start, root_idx - 1 + ) + + root.right = build_tree_helper( + preorder, inorder, + pre_start + left_size + 1, pre_end, + root_idx + 1, in_end + ) + + return root + +def tree_from_traversals(preorder, inorder): + """Reconstruct binary tree from its preorder and inorder traversals.""" + # Validate inputs first + validate_traversals(preorder, inorder) + + # Build the tree using index ranges + return build_tree_helper( + preorder, inorder, + 0, len(preorder) - 1, + 0, len(inorder) - 1 + ) diff --git a/word_count.py b/word_count.py new file mode 100644 index 00000000000..e69de29bb2d