From 9031b06dcc64c3c2e8b833223daa12e01cbf033d Mon Sep 17 00:00:00 2001 From: wony617 <49024958+Jwaminju@users.noreply.github.com> Date: Mon, 10 Nov 2025 23:13:16 +0900 Subject: [PATCH 1/4] Implement mcp server for translation module --- mcp_server.py | 365 ++++++++++++++++++++++++++++++++++++++++++ translator/content.py | 29 ++-- 2 files changed, 385 insertions(+), 9 deletions(-) create mode 100644 mcp_server.py diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..ec78ff9 --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,365 @@ +import gradio as gr +import os +from dotenv import load_dotenv +import json +import re + +load_dotenv() # Load environment variables from .env file +from translator.project_config import get_available_projects, get_project_config +from translator.content import get_content, preprocess_content, get_full_prompt, llm_translate, fill_scaffold +from translator.retriever import report +import os +from pathlib import Path + +def start_translate_handler_mcp(json_input_str): + file_to_translate = "" + project = "" + repo_url = "" + additional_instruction = "" + force_retranslate = False + request_data = {} # Initialize request_data for error context + + try: + request_data_from_json = json.loads(json_input_str) + + # Extract top-level fields for translation control + additional_instruction = request_data_from_json.get("additional_instruction", "") + force_retranslate = request_data_from_json.get("force_retranslate", False) + target_language = request_data_from_json.get("request", {}).get("target_language", "ko") + source_language = "en" # Assuming source language is always English for now + + # Extract file details from the 'files' array (assuming the first file is the target) + files_list = request_data_from_json.get("files", []) + if not files_list: + raise ValueError("No files found in the JSON input for translation.") + + selected_file_data = files_list[0] + docs_url = selected_file_data.get("repo_url") # This is the full blob URL + project = selected_file_data.get("metadata", {}).get("project") + docs_path = selected_file_data.get("metadata", {}).get("docs_path") # Extract docs_path + + # Extract file_to_translate from docs_url + file_to_translate = "" + if "/blob/main/" in docs_url: + file_to_translate = docs_url.split("/blob/main/")[1] + elif "/blob/" in docs_url: # Handle other branches if necessary + parts = docs_url.split("/blob/") + if len(parts) > 1: + file_to_translate = parts[1].split("/", 1)[1] # Get path after branch name + + # additional_instruction is extracted from the top-level, force_retranslate is also extracted. + # No need to re-initialize them here. + + # Construct request_data for the output JSON, using extracted values + request_data = { + "project": project, + "target_language": target_language, + "source_language": source_language, + "files": [ + { + "repo_url": docs_url, # Use docs_url here + "file_path": file_to_translate + } + ] + } + + except json.JSONDecodeError as e: + error_message = f"❌ Invalid JSON input: {str(e)}" + return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message}) + except ValueError as e: + error_message = f"❌ Invalid JSON structure: {str(e)}" + return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message}) + except Exception as e: + error_message = f"❌ Error parsing JSON input: {str(e)}" + return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message}) + + print(f"Received request: file={file_to_translate}, project={project}, repo_url={repo_url}, instruction={additional_instruction}, force_retranslate={force_retranslate}") + + print(f"[DEBUG] Raw JSON input: {json_input_str}") + print(f"[DEBUG] Extracted file_to_translate: {file_to_translate}") + + if not file_to_translate: + response = "❌ Please provide a file path to translate in the JSON input." + return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response}) + + if not project: + response = "❌ Please select a project in the JSON input." + return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response}) + + # Define paths for translated files dynamically + base_output_dir = Path("translation_result") / Path(docs_path) / target_language + base_output_dir.mkdir(parents=True, exist_ok=True) + + # Construct the path for the translated file + # Example: docs/source/en/chat_response_parsing.md -> translation_result/docs/source/ko/chat_response_parsing.md + translated_file_name = Path(file_to_translate).name + translated_file_path = base_output_dir / translated_file_name + print(f"[DEBUG] Constructed translated_file_path: {translated_file_path}") + print(f"[DEBUG] Does translated_file_path exist? {translated_file_path.exists()}") + + translated_doc = "" + response_message = "" + final_json_output = {} # Initialize here + + try: + result_entry = { + "file_path": str(translated_file_path.relative_to(Path("translation_result"))), + "translated_content": "", + "status": "", + "metadata": { + "time_elapsed": 0.0, # Placeholder, actual implementation would measure this + "model_used": "" + } + } + + if not force_retranslate and translated_file_path.exists(): + # Reuse existing translation + with open(translated_file_path, "r", encoding="utf-8") as f: + translated_doc = f.read() + response_message = f"✅ Reused existing translation for {file_to_translate} (Project: {project})" + + result_entry["translated_content"] = translated_doc + result_entry["status"] = "reused" + result_entry["metadata"]["model_used"] = "cached" + + final_json_output = { + "type": "translation.output.response", + "request": request_data, + "results": [result_entry], + "error": None + } + else: + # 1. Get content - now passing docs_url + original_content = get_content(project, docs_url=docs_url) + print(f"[DEBUG] Original content length: {len(original_content)}") + + # 2. Preprocess content + to_translate = preprocess_content(original_content) + print(f"[DEBUG] Preprocessed content length: {len(to_translate)}") + + # 3. Get full prompt + full_prompt = get_full_prompt(target_language, to_translate, additional_instruction) # Use extracted target_language + + # 4. Translate + cb, translated_content_raw = llm_translate(full_prompt) + print(f"LLM Callback: {cb}") + print(f"[DEBUG] Raw translated content length: {len(translated_content_raw)}") + + # Determine model used for metadata + model_used = "" + if os.environ.get("ANTHROPIC_API_KEY"): + model_used = "claude-sonnet-4-20250514 (Anthropic API)" + elif os.environ.get("AWS_BEARER_TOKEN_BEDROCK"): + model_used = "claude-3-7-sonnet-20250219-v1 (AWS Bedrock)" + + # 5. Fill scaffold + translated_doc = fill_scaffold(original_content, to_translate, translated_content_raw) + + # 6. Save the new translation + with open(translated_file_path, "w", encoding="utf-8") as f: + f.write(translated_doc) + + response_message = f"✅ Successfully translated and saved {file_to_translate} (Project: {project})" + + result_entry["translated_content"] = translated_doc + result_entry["status"] = "success" + result_entry["metadata"]["model_used"] = model_used + + final_json_output = { + "type": "translation.output.response", + "request": request_data, + "results": [result_entry], + "error": None + } + print(f"[DEBUG] Final translated_doc content:\n{translated_doc}") + + # Create a display version of translated_doc for the Markdown component + # This version will have problematic custom syntax removed for better rendering. + display_translated_doc = translated_doc + + # Remove XML-style comments for display + display_translated_doc = re.sub(r"", "", display_translated_doc, flags=re.DOTALL) + + # Remove and tags and their content for display + display_translated_doc = re.sub(r"(.*?)", "", display_translated_doc, flags=re.DOTALL) + display_translated_doc = re.sub(r"(.*?)", "", display_translated_doc, flags=re.DOTALL) + + return gr.Textbox(value=f"Translation Complete: {response_message}"), gr.Markdown(value=display_translated_doc), gr.Textbox(value=translated_doc), gr.Json(value=final_json_output) + except Exception as e: + error_message = f"Error during translation: {str(e)}" + # Ensure request_data is defined even in case of early errors for context + # If request_data was not successfully parsed, create a minimal one for error context + if not request_data: + request_data = { + "project": project if project else "unknown", + "target_language": "ko", + "source_language": "en", + "files": [ + { + "repo_url": repo_url if repo_url else "unknown", + "file_path": file_to_translate if file_to_translate else "unknown" + } + ] + } + error_json_output = { + "type": "translation.output.response", + "request": request_data, + "results": [], + "error": error_message + } + return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value=error_json_output) + +def update_status_mcp(): + return gr.Textbox(value="Ready") + +def update_project_config_display(project): + """Update the project config display when project selection changes.""" + if not project: + return "" + + # Since project_config is no longer used for repo_url, we'll just display the project name. + config_html = f""" +### 📋 Project Configuration: {project} + +- **Name:** {project} +""" + return config_html + +def generate_json_request(project, docs_url, additional_instruction, force_retranslate): + # Extract file_path and docs_path from docs_url + file_to_translate = "" + docs_path_extracted = "" + if "/blob/main/" in docs_url: + parts = docs_url.split("/blob/main/") + if len(parts) > 1: + file_to_translate = parts[1] + # Assuming docs_path is the part before the language directory and file name + docs_path_parts = file_to_translate.split("/") + if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md + docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename + elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir) + docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename + else: + docs_path_extracted = "" # No valid docs_path found + elif "/blob/" in docs_url: # Handle other branches if necessary + parts = docs_url.split("/blob/") + if len(parts) > 1: + path_after_blob = parts[1] + branch_and_filepath = path_after_blob.split("/", 1) + if len(branch_and_filepath) > 1: + file_to_translate = branch_and_filepath[1] + docs_path_parts = file_to_translate.split("/") + if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md + docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename + elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir) + docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename + else: + docs_path_extracted = "" # No valid docs_path found + + request_data = { + "files": [ + { + "path": file_to_translate, + "repo_url": docs_url, # Use user-provided docs_url + "metadata": { + "project": project, + "docs_path": docs_path_extracted, # Include docs_path here + } + } + ], + "additional_instruction": additional_instruction, + "force_retranslate": force_retranslate, + "target_language": "ko", # Hardcoded target language for this server + "source_language": "en", # Hardcoded source language for this server + } + return json.dumps(request_data, indent=2) + +def create_mcp_interface(): + with gr.Blocks(css=""" + .markdown-scrollable { + overflow-y: auto; + } + """) as demo: + gr.Markdown("## Translation Module MCP Server") + + status_display = gr.Textbox(label="Status", interactive=False, value="Idle") + start_translate_btn = gr.Button("Start Translation (MCP)", elem_classes="action-button") + + with gr.TabItem("Translate Inputs", id=0): + project_dropdown = gr.Radio( + choices=get_available_projects(), + label="🎯 Select Project", + value="transformers", + ) + project_config_display = gr.Markdown(value=update_project_config_display("transformers")) + docs_url_input = gr.Textbox( + label="🔗 Documentation URL (Full blob URL)", + value="https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md", + placeholder="e.g., https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md", + ) + additional_instruction = gr.Textbox( + label="📝 Additional instructions (Optional)", + placeholder="Example: Translate 'model' as '모델' consistently", + lines=2, + ) + force_retranslate = gr.Checkbox( + label="🔄 Force Retranslate", + value=False, + ) + + generate_json_btn = gr.Button("Generate JSON Request") + json_request_textbox = gr.Textbox( + label="JSON Request (for Translation)", + value="", + lines=10, + interactive=True, + ) + + with gr.Row(): + translated_output = gr.Markdown( + label="Translated Content (Markdown)", + value="", + elem_classes="markdown-scrollable", + height=500, # Explicitly set height to enable scrolling + ) + raw_text_output = gr.Textbox( + label="Translated Content (Raw Text)", + value="", + lines=20, # Give it a reasonable default height + interactive=False, + elem_classes="markdown-scrollable", # Reuse scrollable class + ) + json_output = gr.Json( + label="Raw JSON Output", + value={}, + ) + + # Update project config display when project selection changes + project_dropdown.change( + fn=update_project_config_display, + inputs=[project_dropdown], + outputs=[project_config_display], + ) + + # Connect generate_json_btn to generate_json_request function + generate_json_btn.click( + fn=generate_json_request, + inputs=[ + project_dropdown, + docs_url_input, + additional_instruction, + force_retranslate + ], + outputs=[json_request_textbox], + ) + + start_translate_btn.click( + fn=start_translate_handler_mcp, + inputs=[json_request_textbox], + outputs=[status_display, translated_output, raw_text_output, json_output], + ) + return demo + +if __name__ == "__main__": + demo = create_mcp_interface() + demo.launch() diff --git a/translator/content.py b/translator/content.py index cb7e2a3..e3c3dad 100644 --- a/translator/content.py +++ b/translator/content.py @@ -3,7 +3,7 @@ import string import requests -from langchain.callbacks import get_openai_callback + from langchain_anthropic import ChatAnthropic import boto3 import json @@ -12,21 +12,32 @@ from translator.project_config import get_project_config -def get_content(filepath: str, project: str = "transformers") -> str: - if filepath == "": - raise ValueError("No files selected for translation.") +def get_content(project: str = "transformers", docs_url: str | None = None) -> str: + if not docs_url: + raise ValueError("docs_url must be provided to get_content.") - config = get_project_config(project) - # Extract repo path from repo_url (e.g., "huggingface/transformers") - repo_path = config.repo_url.replace("https://github.com/", "") + url = "" + print(f"[DEBUG] get_content received docs_url: {docs_url}") + if "/blob/" in docs_url: + # It's a full GitHub blob URL, convert to raw + url = docs_url.replace("https://github.com/", "https://raw.githubusercontent.com/").replace("/blob/", "/") + else: + # Assume it's a base GitHub repo URL, combine with filepath and main branch + # This case should ideally not be hit if docs_url is always a full blob URL + # but kept for robustness if the input format varies. + repo_owner_repo = docs_url.replace("https://github.com/", "") + # We need to extract the filepath from docs_url if it's not a full blob URL + # For now, raising an error if it's not a full blob URL to enforce input consistency. + raise ValueError("docs_url must be a full GitHub blob URL (e.g., containing /blob/).") - url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}" + print(f"[DEBUG] Constructed content URL: {url}") + response = requests.get(url) if response.status_code == 200: content = response.text return content else: - raise ValueError("Failed to retrieve content from the URL.", url) + raise ValueError(f"Failed to retrieve content from the URL: {url}. Status code: {response.status_code}") def preprocess_content(content: str) -> str: From 4e61942c66e7301511a0194dcc014f56722596a6 Mon Sep 17 00:00:00 2001 From: wony617 <49024958+Jwaminju@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:14:02 -0800 Subject: [PATCH 2/4] Initial of pr_generator MCP --- mcp/__init__.py | 5 ++ mcp/pr_uploader.py | 167 +++++++++++++++++++++++++++++++++++++++ pr_generator/agent.py | 6 +- pr_generator/searcher.py | 12 ++- 4 files changed, 184 insertions(+), 6 deletions(-) create mode 100644 mcp/__init__.py create mode 100644 mcp/pr_uploader.py diff --git a/mcp/__init__.py b/mcp/__init__.py new file mode 100644 index 0000000..a379474 --- /dev/null +++ b/mcp/__init__.py @@ -0,0 +1,5 @@ +"""Implementation of MCP server for Hugging Face i18n Agent""" + +from .pr_uploader import create_pr_agent_interface + +__all__ = ["create_pr_agent_interface"] \ No newline at end of file diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py new file mode 100644 index 0000000..4c18af7 --- /dev/null +++ b/mcp/pr_uploader.py @@ -0,0 +1,167 @@ +import gradio as gr +import os +import json +import re +from dotenv import load_dotenv + +load_dotenv() # Load environment variables from .env file + +from pr_generator.agent import GitHubPRAgent +from pr_generator.searcher import find_reference_pr_simple_stream + +# Initialize GitHubPRAgent +# These should be set as environment variables +USER_OWNER = os.environ.get("GH_USER_OWNER", "your_github_username") +USER_REPO = os.environ.get("GH_USER_REPO", "your_forked_repo_name") +BASE_OWNER = os.environ.get("GH_BASE_OWNER", "huggingface") +BASE_REPO = os.environ.get("GH_BASE_REPO", "transformers") + +pr_agent = GitHubPRAgent( + user_owner=USER_OWNER, + user_repo=USER_REPO, + base_owner=BASE_OWNER, + base_repo=BASE_REPO +) + +def start_pr_generation_mcp( + reference_pr_url: str, + target_language: str, + filepath: str, + translated_doc_content: str, + base_branch: str = "main", +): + # This function will call the GitHubPRAgent's workflow + # and return the results for display in Gradio. + # The actual implementation will involve calling pr_agent.run_translation_pr_workflow + # and handling its output. + + # Placeholder for actual PR generation logic + print(f"Starting PR generation with:") + print(f" Reference PR URL: {reference_pr_url}") + print(f" Target Language: {target_language}") + print(f" Filepath: {filepath}") + print(f" Translated Content Length: {len(translated_doc_content)} bytes") + print(f" Base Branch: {base_branch}") + + try: + result = pr_agent.run_translation_pr_workflow( + reference_pr_url=reference_pr_url, + target_language=target_language, + filepath=filepath, + translated_doc=translated_doc_content, + base_branch=base_branch, + ) + + if result["status"] == "success": + message = f"✅ PR created successfully: {result['pr_url']}" + return gr.Textbox(value=message), gr.Json(value=result) + elif result["status"] == "partial_success": + message = f"⚠️ Partial success: {result['message']}" + return gr.Textbox(value=message), gr.Json(value=result) + else: + message = f"❌ Error during PR generation: {result['message']}" + return gr.Textbox(value=message), gr.Json(value=result) + + except Exception as e: + error_message = f"❌ Unexpected error during PR generation: {str(e)}" + return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}) + +def search_reference_pr_mcp(target_language: str, context: str): + # This function will call the searcher agent and return the best PR URL. + # It will also stream the progress messages. + + search_generator = find_reference_pr_simple_stream(target_language=target_language, context=context) + + # Collect all messages and the final result + messages = [] + final_result = None + try: + while True: + message = next(search_generator) + messages.append(message) + print(message) # Print to console for real-time feedback + except StopIteration as e: + final_result = e.value + + if final_result and final_result.get("status") == "success": + pr_url = final_result.get("result", "").replace("Recommended PR URL: ", "") + return gr.Textbox(value="\n".join(messages)), gr.Textbox(value=pr_url) + else: + error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search." + return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="") + + +def create_pr_agent_interface(): + with gr.Blocks(css=""" + .markdown-scrollable { + overflow-y: auto; + } + """) as demo: + gr.Markdown("## PR Agent Module MCP Server") + + pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle") + start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button") + + with gr.TabItem("PR Generation Inputs", id=0): + gr.Markdown("### 🔍 Reference PR Search") + with gr.Row(): + search_target_language = gr.Textbox(label="Target Language (for search)", value="korean") + search_context = gr.Textbox(label="Context (for search)", value="docs") + search_pr_btn = gr.Button("Search Reference PR") + search_output = gr.Textbox(label="Search Progress", interactive=False, lines=5) + recommended_pr_url = gr.Textbox(label="Recommended Reference PR URL", interactive=True) + + gr.Markdown("### 📝 PR Generation Details") + reference_pr_url_input = gr.Textbox( + label="🔗 Reference PR URL", + value="https://github.com/huggingface/transformers/pull/24968", + placeholder="e.g., https://github.com/huggingface/transformers/pull/24968", + ) + target_language_input = gr.Textbox( + label="🌐 Target Language", + value="ko", + placeholder="e.g., ko, ja, fr", + ) + filepath_input = gr.Textbox( + label="📁 Original File Path (e.g., docs/source/en/accelerator_selection.md)", + value="docs/source/en/accelerator_selection.md", + placeholder="e.g., docs/source/en/accelerator_selection.md", + ) + translated_doc_content_input = gr.Textbox( + label="📄 Translated Document Content", + value="# Translated Accelerator Selection\n\nThis is the translated content.", + lines=10, + interactive=True, + ) + base_branch_input = gr.Textbox( + label="🌿 Base Branch (e.g., main)", + value="main", + ) + + pr_json_output = gr.Json( + label="PR Generation Raw JSON Output", + value={}, + ) + + search_pr_btn.click( + fn=search_reference_pr_mcp, + inputs=[search_target_language, search_context], + outputs=[search_output, recommended_pr_url], + ) + + start_pr_btn.click( + fn=start_pr_generation_mcp, + inputs=[ + reference_pr_url_input, + target_language_input, + filepath_input, + translated_doc_content_input, + base_branch_input, + ], + outputs=[pr_status_display, pr_json_output], + ) + return demo + +if __name__ == "__main__": + demo = create_pr_agent_interface() + demo.launch() diff --git a/pr_generator/agent.py b/pr_generator/agent.py index 8e43d5c..098986e 100644 --- a/pr_generator/agent.py +++ b/pr_generator/agent.py @@ -8,6 +8,7 @@ import re import json from typing import Optional, Dict, List, Tuple, Any +from github import Github # Load environment variables from .env file from dotenv import load_dotenv @@ -21,16 +22,17 @@ # Library imports and error handling try: - from github import Github, GithubException from github.GitRef import GitRef from langchain_anthropic import ChatAnthropic REQUIRED_LIBS_AVAILABLE = True except ImportError as e: - print(f"Required libraries are not installed: {e}") + print(f"DEBUG: ImportError in agent.py: {e}") print("Please run: pip install PyGithub boto3 langchain-anthropic") REQUIRED_LIBS_AVAILABLE = False +print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in agent.py: {REQUIRED_LIBS_AVAILABLE}") + class GitHubPRAgent: """Agent class for GitHub PR creation""" diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py index 7ade3a4..aebfc0b 100644 --- a/pr_generator/searcher.py +++ b/pr_generator/searcher.py @@ -7,6 +7,8 @@ import re import logging from typing import List, Dict, Any, Optional +from github import Github +from langchain_core.tools import StructuredTool # Load environment variables from dotenv import load_dotenv @@ -22,16 +24,18 @@ # Langchain imports try: from langchain_anthropic import ChatAnthropic - from langchain.tools import StructuredTool - from langchain.agents import AgentExecutor, create_tool_calling_agent + from langchain_classic.agents import AgentExecutor + from langchain.agents import create_tool_calling_agent from langchain_core.prompts import ChatPromptTemplate - from github import Github REQUIRED_LIBS_AVAILABLE = True except ImportError as e: - print(f"Required libraries are not installed: {e}") + print(f"DEBUG: ImportError in searcher.py: {e}") + print("Please run: pip install PyGithub boto3 langchain-anthropic") REQUIRED_LIBS_AVAILABLE = False +print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in searcher.py: {REQUIRED_LIBS_AVAILABLE}") + # Constants ANTHROPIC_MODEL_ID = "claude-sonnet-4-20250514" DEFAULT_TEMPERATURE = 0.0 From c487bad1b6e1e305b49d611b1ed32716358bd7cc Mon Sep 17 00:00:00 2001 From: wony617 <49024958+Jwaminju@users.noreply.github.com> Date: Mon, 24 Nov 2025 04:07:24 -0800 Subject: [PATCH 3/4] Updaete pr generator mdoule --- mcp/pr_uploader.py | 106 +++++++++++++++++++++++++++++++-------- pr_generator/agent.py | 94 ++++++++++++++++++++++++++++++++-- pr_generator/searcher.py | 3 +- 3 files changed, 178 insertions(+), 25 deletions(-) diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py index 4c18af7..cab7af3 100644 --- a/mcp/pr_uploader.py +++ b/mcp/pr_uploader.py @@ -27,45 +27,49 @@ def start_pr_generation_mcp( reference_pr_url: str, target_language: str, filepath: str, - translated_doc_content: str, + translated_filepath: str, base_branch: str = "main", + preview_mode: bool = False, ): # This function will call the GitHubPRAgent's workflow # and return the results for display in Gradio. - # The actual implementation will involve calling pr_agent.run_translation_pr_workflow - # and handling its output. - # Placeholder for actual PR generation logic print(f"Starting PR generation with:") print(f" Reference PR URL: {reference_pr_url}") print(f" Target Language: {target_language}") print(f" Filepath: {filepath}") - print(f" Translated Content Length: {len(translated_doc_content)} bytes") + print(f" Translated Filepath: {translated_filepath}") # Pass the filepath directly print(f" Base Branch: {base_branch}") + print(f" Preview Mode: {preview_mode}") # Log preview mode status try: result = pr_agent.run_translation_pr_workflow( reference_pr_url=reference_pr_url, target_language=target_language, filepath=filepath, - translated_doc=translated_doc_content, + translated_filepath=translated_filepath, # Pass the filepath directly base_branch=base_branch, + preview_mode=preview_mode, # Pass preview_mode to the agent ) - if result["status"] == "success": + if result["status"] == "preview": + message = "✨ PR Preview Generated Successfully!" + # Return preview data, and also enable the checkbox and button + return gr.Textbox(value=message), gr.Json(value=result["data"]), result["data"], gr.update(interactive=True), gr.update(interactive=True) + elif result["status"] == "success": message = f"✅ PR created successfully: {result['pr_url']}" - return gr.Textbox(value=message), gr.Json(value=result) + # On success, reset checkbox and button + return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False) elif result["status"] == "partial_success": message = f"⚠️ Partial success: {result['message']}" - return gr.Textbox(value=message), gr.Json(value=result) + return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False) else: message = f"❌ Error during PR generation: {result['message']}" - return gr.Textbox(value=message), gr.Json(value=result) + return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False) except Exception as e: error_message = f"❌ Unexpected error during PR generation: {str(e)}" - return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}) - + return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), None, gr.update(value=False, interactive=False), gr.update(interactive=False) def search_reference_pr_mcp(target_language: str, context: str): # This function will call the searcher agent and return the best PR URL. # It will also stream the progress messages. @@ -90,6 +94,46 @@ def search_reference_pr_mcp(target_language: str, context: str): error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search." return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="") +def handle_pr_confirmation_mcp(preview_data: dict, approved: bool): + if not approved: + message = "❌ PR creation cancelled by user." + return gr.Textbox(value=message), gr.Json(value=preview_data), gr.update(value=False, interactive=False), gr.update(interactive=False) + + if not preview_data: + message = "❌ No preview data available to create PR." + return gr.Textbox(value=message), gr.Json(value={}), gr.update(value=False, interactive=False), gr.update(interactive=False) + + try: + # Extract necessary parameters from preview_data + reference_pr_url = preview_data["reference_pr_url"] + target_language = preview_data["target_language"] + filepath = preview_data["filepath"] + translated_filepath = preview_data["target_filepath"] # Get the filepath + base_branch = preview_data["base_branch_for_pr"].split(":")[-1] + + print(f"Executing PR creation for: {filepath} to {target_language}") + result = pr_agent.run_translation_pr_workflow( + reference_pr_url=reference_pr_url, + target_language=target_language, + filepath=filepath, + translated_filepath=translated_filepath, # Pass the filepath directly + base_branch=base_branch, + preview_mode=False, # Actual creation mode + ) + + if result["status"] == "success": + message = f"✅ PR created successfully: {result['pr_url']}" + return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False) + elif result["status"] == "partial_success": + message = f"⚠️ Partial success: {result['message']}" + return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False) + else: + message = f"❌ Error during PR creation: {result['message']}" + return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False) + + except Exception as e: + error_message = f"❌ Unexpected error during PR creation: {str(e)}" + return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), gr.update(value=False, interactive=False), gr.update(interactive=False) def create_pr_agent_interface(): with gr.Blocks(css=""" @@ -100,7 +144,8 @@ def create_pr_agent_interface(): gr.Markdown("## PR Agent Module MCP Server") pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle") - start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button") + with gr.Row(): # Use gr.Row to place buttons side-by-side + preview_pr_btn = gr.Button("Preview PR (JSON)", elem_classes="secondary-button") # New button for preview with gr.TabItem("PR Generation Inputs", id=0): gr.Markdown("### 🔍 Reference PR Search") @@ -127,11 +172,12 @@ def create_pr_agent_interface(): value="docs/source/en/accelerator_selection.md", placeholder="e.g., docs/source/en/accelerator_selection.md", ) - translated_doc_content_input = gr.Textbox( - label="📄 Translated Document Content", - value="# Translated Accelerator Selection\n\nThis is the translated content.", - lines=10, + translated_filepath_input = gr.Textbox( + label="📄 Translated Document File Path (e.g., path/to/translated_file.md)", + value="translation_result/docs/source/ko/accelerator_selection.md", + lines=1, interactive=True, + placeholder="e.g., translation_result/docs/source/ko/accelerator_selection.md", ) base_branch_input = gr.Textbox( label="🌿 Base Branch (e.g., main)", @@ -142,6 +188,14 @@ def create_pr_agent_interface(): label="PR Generation Raw JSON Output", value={}, ) + + # New UI for human approval + with gr.Row(): + confirmation_checkbox = gr.Checkbox(label="I approve this PR preview and wish to proceed with actual PR creation.", interactive=False) + confirm_pr_btn = gr.Button("Confirm & Create PR", elem_classes="action-button", interactive=False) + + # Hidden state to store preview data + pr_preview_state = gr.State(value=None) search_pr_btn.click( fn=search_reference_pr_mcp, @@ -149,16 +203,28 @@ def create_pr_agent_interface(): outputs=[search_output, recommended_pr_url], ) - start_pr_btn.click( + preview_pr_btn.click( # Modified click event for preview button fn=start_pr_generation_mcp, inputs=[ reference_pr_url_input, target_language_input, filepath_input, - translated_doc_content_input, + translated_filepath_input, # Changed to use the new filepath input base_branch_input, + gr.State(True), # Pass True for preview_mode ], - outputs=[pr_status_display, pr_json_output], + outputs=[pr_status_display, pr_json_output, pr_preview_state, confirmation_checkbox, confirm_pr_btn], + ).success( + fn=lambda x: [gr.update(interactive=True), gr.update(interactive=True)], # Enable checkbox and button + inputs=pr_preview_state, # Use output from start_pr_generation_mcp to trigger + outputs=[confirmation_checkbox, confirm_pr_btn], + queue=False, + ) + + confirm_pr_btn.click( # New click event for confirm button + fn=lambda preview_data, approved: handle_pr_confirmation_mcp(preview_data, approved), + inputs=[pr_preview_state, confirmation_checkbox], + outputs=[pr_status_display, pr_json_output, confirmation_checkbox, confirm_pr_btn], # Reset checkbox and button state ) return demo diff --git a/pr_generator/agent.py b/pr_generator/agent.py index 098986e..8924ca2 100644 --- a/pr_generator/agent.py +++ b/pr_generator/agent.py @@ -23,6 +23,7 @@ # Library imports and error handling try: from github.GitRef import GitRef + from github import GithubException from langchain_anthropic import ChatAnthropic REQUIRED_LIBS_AVAILABLE = True @@ -418,23 +419,110 @@ def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str: except Exception as e: return f"Failed to retrieve branch information: {str(e)}" + def _prepare_pr_data( + self, + reference_pr_url: str, + target_language: str, + filepath: str, + translated_filepath: str, # Changed to accept filepath + base_branch: str = "main", + ) -> Dict[str, Any]: + """Prepare all data required for PR creation without making GitHub API calls.""" + # 1. Analyze reference PR + pr_analysis = self.analyze_reference_pr(reference_pr_url) + if "error" in pr_analysis: + return {"status": "error", "message": pr_analysis["error"]} + + # Read the translated document content from the provided file path + translated_doc_content = "" + if translated_filepath: + print(f"DEBUG (agent.py -> _prepare_pr_data): Attempting to read translated file from: {translated_filepath}") # Added logging + try: + with open(translated_filepath, 'r', encoding='utf-8') as f: + translated_doc_content = f.read() + except FileNotFoundError: + return {"status": "error", "message": f"Translated file not found: {translated_filepath}"} + except Exception as e: + return {"status": "error", "message": f"Error reading translated file {translated_filepath}: {str(e)}"} + + # 2. Generate translation file path and branch name + target_filepath = filepath.replace("/en/", f"/{target_language}/") + file_name = filepath.split("/")[-1] + + branch_name = self.generate_branch_name_from_reference( + pr_analysis["head_branch"], target_language, file_name + ) + + # 3. Generate commit message + commit_messages = [commit["message"] for commit in pr_analysis["commits"]] + commit_message = self.generate_commit_message_from_reference( + commit_messages, target_language, file_name + ) + + # 4. Generate PR title and body + pr_title, pr_body = self.generate_pr_content_from_reference( + pr_analysis["title"], + pr_analysis["body"], + target_language, + filepath, + target_filepath, + file_name, + ) + + return { + "status": "preview_ready", + "reference_pr_url": reference_pr_url, + "target_language": target_language, + "filepath": filepath, + "branch_name": branch_name, + "commit_message": commit_message, + "target_filepath": target_filepath, + "pr_title": pr_title, + "pr_body": pr_body, + "head_branch_for_pr": f"{self.user_owner}:{branch_name}", + "base_branch_for_pr": f"{self.base_owner}:{base_branch}", + } + def run_translation_pr_workflow( self, reference_pr_url: str, target_language: str, filepath: str, - translated_doc: str, + translated_filepath: str, # Changed to accept filepath base_branch: str = "main", + preview_mode: bool = False, ) -> Dict[str, Any]: """Execute translation document PR creation workflow.""" try: + if preview_mode: + print("🚀 Running in preview mode...") + preview_data = self._prepare_pr_data( + reference_pr_url, target_language, filepath, translated_filepath, base_branch + ) + if preview_data["status"] == "error": + return preview_data # Return error from preparation + return {"status": "preview", "data": preview_data} + + + # If not in preview mode, read the translated content from the file + translated_doc_content = "" + if translated_filepath: + try: + with open(translated_filepath, 'r', encoding='utf-8') as f: + translated_doc_content = f.read() + except FileNotFoundError: + error_message = f"❌ Translated file not found: {translated_filepath}" + return {"status": "error", "message": error_message, "error_details": error_message} + except Exception as e: + error_message = f"❌ Error reading translated file {translated_filepath}: {str(e)}" + return {"status": "error", "message": error_message, "error_details": error_message} + # 1. Analyze reference PR print(f"🔍 Analyzing reference PR: {reference_pr_url}") pr_analysis = self.analyze_reference_pr(reference_pr_url) if "error" in pr_analysis: return {"status": "error", "message": pr_analysis["error"]} - print("Reference PR analysis completed") # 2. Generate translation file path and branch name @@ -483,7 +571,7 @@ def run_translation_pr_workflow( self.user_repo, target_filepath, commit_message, - translated_doc, + translated_doc_content, # Pass the read content branch_name, ) diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py index aebfc0b..d517db3 100644 --- a/pr_generator/searcher.py +++ b/pr_generator/searcher.py @@ -24,8 +24,7 @@ # Langchain imports try: from langchain_anthropic import ChatAnthropic - from langchain_classic.agents import AgentExecutor - from langchain.agents import create_tool_calling_agent + from langchain.agents import create_tool_calling_agent, AgentExecutor # Explicitly import AgentExecutor from langchain_core.prompts import ChatPromptTemplate REQUIRED_LIBS_AVAILABLE = True From 51cb24a29799dc6694a8143d5e3b070862d88140 Mon Sep 17 00:00:00 2001 From: wony617 <49024958+Jwaminju@users.noreply.github.com> Date: Mon, 24 Nov 2025 04:08:01 -0800 Subject: [PATCH 4/4] Delete wrong files --- .../docs/source/en/accelerator_selection.md | 127 ------------------ 1 file changed, 127 deletions(-) delete mode 100644 translation_result/docs/source/en/accelerator_selection.md diff --git a/translation_result/docs/source/en/accelerator_selection.md b/translation_result/docs/source/en/accelerator_selection.md deleted file mode 100644 index 58cbfa1..0000000 --- a/translation_result/docs/source/en/accelerator_selection.md +++ /dev/null @@ -1,127 +0,0 @@ - - -# 가속기 선택 [[accelerator-selection]] - -분산 학습 중에는 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 컴퓨팅 성능을 가진 가속기가 있을 때 더 빠른 가속기를 먼저 사용하고 싶은 경우에 유용할 수 있습니다. 또는 사용 가능한 가속기의 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed)는 필요하지 않습니다. - -이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다. - -## 가속기 수 [[number-of-accelerators]] - -예를 들어, 4개의 가속기가 있고 처음 2개만 사용하고 싶다면 아래 명령을 실행하세요. - - - - -`--nproc_per_node`를 사용하여 사용할 가속기 수를 선택합니다. - -```bash -torchrun --nproc_per_node=2 trainer-program.py ... -``` - - - - -`--num_processes`를 사용하여 사용할 가속기 수를 선택합니다. - -```bash -accelerate launch --num_processes 2 trainer-program.py ... -``` - - - - -`--num_gpus`를 사용하여 사용할 GPU 수를 선택합니다. - -```bash -deepspeed --num_gpus 2 trainer-program.py ... -``` - - - - -## 가속기 순서 [[order-of-accelerators]] -사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 종종 각 실행에 대해 명령줄에서 설정되지만, `~/.bashrc`나 다른 시작 구성 파일에 추가할 수도 있습니다. - -예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면: - - - - -```bash -CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ... -``` - -GPU 0과 2만 PyTorch에서 "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다. -순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용): - - -```bash -CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py ... -``` - -GPU 없이 실행하려면: - -```bash -CUDA_VISIBLE_DEVICES= python trainer-program.py ... -``` - -`CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치의 순서를 제어할 수도 있습니다: - -- PCIe 버스 ID 순서 (`nvidia-smi`와 일치): - - ```bash -$hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID - ``` - -- 컴퓨팅 성능 순서 (가장 빠른 것부터): - - ```bash - export CUDA_DEVICE_ORDER=FASTEST_FIRST - ``` - - - - -```bash -ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ... -``` - -XPU 0과 2만 PyTorch에서 "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다. -순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용): - -```bash -ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ... -``` - - -다음을 사용하여 Intel XPU의 순서를 제어할 수도 있습니다: - -```bash -export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1 -``` - -Intel XPU에서의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요. - - - - - - -> [!WARNING] -> 환경 변수는 명령줄에 추가하는 대신 내보낼 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 잘못된 가속기를 사용하게 될 수 있어 혼란을 야기할 수 있으므로 권장하지 않습니다. 대신, 같은 명령줄에서 특정 훈련 실행을 위해 환경 변수를 설정하는 것이 일반적인 관례입니다. -``` \ No newline at end of file