From 9031b06dcc64c3c2e8b833223daa12e01cbf033d Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 10 Nov 2025 23:13:16 +0900
Subject: [PATCH 1/4] Implement mcp server for translation module
---
mcp_server.py | 365 ++++++++++++++++++++++++++++++++++++++++++
translator/content.py | 29 ++--
2 files changed, 385 insertions(+), 9 deletions(-)
create mode 100644 mcp_server.py
diff --git a/mcp_server.py b/mcp_server.py
new file mode 100644
index 0000000..ec78ff9
--- /dev/null
+++ b/mcp_server.py
@@ -0,0 +1,365 @@
+import gradio as gr
+import os
+from dotenv import load_dotenv
+import json
+import re
+
+load_dotenv() # Load environment variables from .env file
+from translator.project_config import get_available_projects, get_project_config
+from translator.content import get_content, preprocess_content, get_full_prompt, llm_translate, fill_scaffold
+from translator.retriever import report
+import os
+from pathlib import Path
+
+def start_translate_handler_mcp(json_input_str):
+ file_to_translate = ""
+ project = ""
+ repo_url = ""
+ additional_instruction = ""
+ force_retranslate = False
+ request_data = {} # Initialize request_data for error context
+
+ try:
+ request_data_from_json = json.loads(json_input_str)
+
+ # Extract top-level fields for translation control
+ additional_instruction = request_data_from_json.get("additional_instruction", "")
+ force_retranslate = request_data_from_json.get("force_retranslate", False)
+ target_language = request_data_from_json.get("request", {}).get("target_language", "ko")
+ source_language = "en" # Assuming source language is always English for now
+
+ # Extract file details from the 'files' array (assuming the first file is the target)
+ files_list = request_data_from_json.get("files", [])
+ if not files_list:
+ raise ValueError("No files found in the JSON input for translation.")
+
+ selected_file_data = files_list[0]
+ docs_url = selected_file_data.get("repo_url") # This is the full blob URL
+ project = selected_file_data.get("metadata", {}).get("project")
+ docs_path = selected_file_data.get("metadata", {}).get("docs_path") # Extract docs_path
+
+ # Extract file_to_translate from docs_url
+ file_to_translate = ""
+ if "/blob/main/" in docs_url:
+ file_to_translate = docs_url.split("/blob/main/")[1]
+ elif "/blob/" in docs_url: # Handle other branches if necessary
+ parts = docs_url.split("/blob/")
+ if len(parts) > 1:
+ file_to_translate = parts[1].split("/", 1)[1] # Get path after branch name
+
+ # additional_instruction is extracted from the top-level, force_retranslate is also extracted.
+ # No need to re-initialize them here.
+
+ # Construct request_data for the output JSON, using extracted values
+ request_data = {
+ "project": project,
+ "target_language": target_language,
+ "source_language": source_language,
+ "files": [
+ {
+ "repo_url": docs_url, # Use docs_url here
+ "file_path": file_to_translate
+ }
+ ]
+ }
+
+ except json.JSONDecodeError as e:
+ error_message = f"❌ Invalid JSON input: {str(e)}"
+ return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+ except ValueError as e:
+ error_message = f"❌ Invalid JSON structure: {str(e)}"
+ return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+ except Exception as e:
+ error_message = f"❌ Error parsing JSON input: {str(e)}"
+ return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+
+ print(f"Received request: file={file_to_translate}, project={project}, repo_url={repo_url}, instruction={additional_instruction}, force_retranslate={force_retranslate}")
+
+ print(f"[DEBUG] Raw JSON input: {json_input_str}")
+ print(f"[DEBUG] Extracted file_to_translate: {file_to_translate}")
+
+ if not file_to_translate:
+ response = "❌ Please provide a file path to translate in the JSON input."
+ return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response})
+
+ if not project:
+ response = "❌ Please select a project in the JSON input."
+ return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response})
+
+ # Define paths for translated files dynamically
+ base_output_dir = Path("translation_result") / Path(docs_path) / target_language
+ base_output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Construct the path for the translated file
+ # Example: docs/source/en/chat_response_parsing.md -> translation_result/docs/source/ko/chat_response_parsing.md
+ translated_file_name = Path(file_to_translate).name
+ translated_file_path = base_output_dir / translated_file_name
+ print(f"[DEBUG] Constructed translated_file_path: {translated_file_path}")
+ print(f"[DEBUG] Does translated_file_path exist? {translated_file_path.exists()}")
+
+ translated_doc = ""
+ response_message = ""
+ final_json_output = {} # Initialize here
+
+ try:
+ result_entry = {
+ "file_path": str(translated_file_path.relative_to(Path("translation_result"))),
+ "translated_content": "",
+ "status": "",
+ "metadata": {
+ "time_elapsed": 0.0, # Placeholder, actual implementation would measure this
+ "model_used": ""
+ }
+ }
+
+ if not force_retranslate and translated_file_path.exists():
+ # Reuse existing translation
+ with open(translated_file_path, "r", encoding="utf-8") as f:
+ translated_doc = f.read()
+ response_message = f"✅ Reused existing translation for {file_to_translate} (Project: {project})"
+
+ result_entry["translated_content"] = translated_doc
+ result_entry["status"] = "reused"
+ result_entry["metadata"]["model_used"] = "cached"
+
+ final_json_output = {
+ "type": "translation.output.response",
+ "request": request_data,
+ "results": [result_entry],
+ "error": None
+ }
+ else:
+ # 1. Get content - now passing docs_url
+ original_content = get_content(project, docs_url=docs_url)
+ print(f"[DEBUG] Original content length: {len(original_content)}")
+
+ # 2. Preprocess content
+ to_translate = preprocess_content(original_content)
+ print(f"[DEBUG] Preprocessed content length: {len(to_translate)}")
+
+ # 3. Get full prompt
+ full_prompt = get_full_prompt(target_language, to_translate, additional_instruction) # Use extracted target_language
+
+ # 4. Translate
+ cb, translated_content_raw = llm_translate(full_prompt)
+ print(f"LLM Callback: {cb}")
+ print(f"[DEBUG] Raw translated content length: {len(translated_content_raw)}")
+
+ # Determine model used for metadata
+ model_used = ""
+ if os.environ.get("ANTHROPIC_API_KEY"):
+ model_used = "claude-sonnet-4-20250514 (Anthropic API)"
+ elif os.environ.get("AWS_BEARER_TOKEN_BEDROCK"):
+ model_used = "claude-3-7-sonnet-20250219-v1 (AWS Bedrock)"
+
+ # 5. Fill scaffold
+ translated_doc = fill_scaffold(original_content, to_translate, translated_content_raw)
+
+ # 6. Save the new translation
+ with open(translated_file_path, "w", encoding="utf-8") as f:
+ f.write(translated_doc)
+
+ response_message = f"✅ Successfully translated and saved {file_to_translate} (Project: {project})"
+
+ result_entry["translated_content"] = translated_doc
+ result_entry["status"] = "success"
+ result_entry["metadata"]["model_used"] = model_used
+
+ final_json_output = {
+ "type": "translation.output.response",
+ "request": request_data,
+ "results": [result_entry],
+ "error": None
+ }
+ print(f"[DEBUG] Final translated_doc content:\n{translated_doc}")
+
+ # Create a display version of translated_doc for the Markdown component
+ # This version will have problematic custom syntax removed for better rendering.
+ display_translated_doc = translated_doc
+
+ # Remove XML-style comments for display
+ display_translated_doc = re.sub(r"", "", display_translated_doc, flags=re.DOTALL)
+
+ # Remove and tags and their content for display
+ display_translated_doc = re.sub(r"(.*?)", "", display_translated_doc, flags=re.DOTALL)
+ display_translated_doc = re.sub(r"(.*?)", "", display_translated_doc, flags=re.DOTALL)
+
+ return gr.Textbox(value=f"Translation Complete: {response_message}"), gr.Markdown(value=display_translated_doc), gr.Textbox(value=translated_doc), gr.Json(value=final_json_output)
+ except Exception as e:
+ error_message = f"Error during translation: {str(e)}"
+ # Ensure request_data is defined even in case of early errors for context
+ # If request_data was not successfully parsed, create a minimal one for error context
+ if not request_data:
+ request_data = {
+ "project": project if project else "unknown",
+ "target_language": "ko",
+ "source_language": "en",
+ "files": [
+ {
+ "repo_url": repo_url if repo_url else "unknown",
+ "file_path": file_to_translate if file_to_translate else "unknown"
+ }
+ ]
+ }
+ error_json_output = {
+ "type": "translation.output.response",
+ "request": request_data,
+ "results": [],
+ "error": error_message
+ }
+ return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value=error_json_output)
+
+def update_status_mcp():
+ return gr.Textbox(value="Ready")
+
+def update_project_config_display(project):
+ """Update the project config display when project selection changes."""
+ if not project:
+ return ""
+
+ # Since project_config is no longer used for repo_url, we'll just display the project name.
+ config_html = f"""
+### 📋 Project Configuration: {project}
+
+- **Name:** {project}
+"""
+ return config_html
+
+def generate_json_request(project, docs_url, additional_instruction, force_retranslate):
+ # Extract file_path and docs_path from docs_url
+ file_to_translate = ""
+ docs_path_extracted = ""
+ if "/blob/main/" in docs_url:
+ parts = docs_url.split("/blob/main/")
+ if len(parts) > 1:
+ file_to_translate = parts[1]
+ # Assuming docs_path is the part before the language directory and file name
+ docs_path_parts = file_to_translate.split("/")
+ if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md
+ docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename
+ elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir)
+ docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename
+ else:
+ docs_path_extracted = "" # No valid docs_path found
+ elif "/blob/" in docs_url: # Handle other branches if necessary
+ parts = docs_url.split("/blob/")
+ if len(parts) > 1:
+ path_after_blob = parts[1]
+ branch_and_filepath = path_after_blob.split("/", 1)
+ if len(branch_and_filepath) > 1:
+ file_to_translate = branch_and_filepath[1]
+ docs_path_parts = file_to_translate.split("/")
+ if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md
+ docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename
+ elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir)
+ docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename
+ else:
+ docs_path_extracted = "" # No valid docs_path found
+
+ request_data = {
+ "files": [
+ {
+ "path": file_to_translate,
+ "repo_url": docs_url, # Use user-provided docs_url
+ "metadata": {
+ "project": project,
+ "docs_path": docs_path_extracted, # Include docs_path here
+ }
+ }
+ ],
+ "additional_instruction": additional_instruction,
+ "force_retranslate": force_retranslate,
+ "target_language": "ko", # Hardcoded target language for this server
+ "source_language": "en", # Hardcoded source language for this server
+ }
+ return json.dumps(request_data, indent=2)
+
+def create_mcp_interface():
+ with gr.Blocks(css="""
+ .markdown-scrollable {
+ overflow-y: auto;
+ }
+ """) as demo:
+ gr.Markdown("## Translation Module MCP Server")
+
+ status_display = gr.Textbox(label="Status", interactive=False, value="Idle")
+ start_translate_btn = gr.Button("Start Translation (MCP)", elem_classes="action-button")
+
+ with gr.TabItem("Translate Inputs", id=0):
+ project_dropdown = gr.Radio(
+ choices=get_available_projects(),
+ label="🎯 Select Project",
+ value="transformers",
+ )
+ project_config_display = gr.Markdown(value=update_project_config_display("transformers"))
+ docs_url_input = gr.Textbox(
+ label="🔗 Documentation URL (Full blob URL)",
+ value="https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md",
+ placeholder="e.g., https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md",
+ )
+ additional_instruction = gr.Textbox(
+ label="📝 Additional instructions (Optional)",
+ placeholder="Example: Translate 'model' as '모델' consistently",
+ lines=2,
+ )
+ force_retranslate = gr.Checkbox(
+ label="🔄 Force Retranslate",
+ value=False,
+ )
+
+ generate_json_btn = gr.Button("Generate JSON Request")
+ json_request_textbox = gr.Textbox(
+ label="JSON Request (for Translation)",
+ value="",
+ lines=10,
+ interactive=True,
+ )
+
+ with gr.Row():
+ translated_output = gr.Markdown(
+ label="Translated Content (Markdown)",
+ value="",
+ elem_classes="markdown-scrollable",
+ height=500, # Explicitly set height to enable scrolling
+ )
+ raw_text_output = gr.Textbox(
+ label="Translated Content (Raw Text)",
+ value="",
+ lines=20, # Give it a reasonable default height
+ interactive=False,
+ elem_classes="markdown-scrollable", # Reuse scrollable class
+ )
+ json_output = gr.Json(
+ label="Raw JSON Output",
+ value={},
+ )
+
+ # Update project config display when project selection changes
+ project_dropdown.change(
+ fn=update_project_config_display,
+ inputs=[project_dropdown],
+ outputs=[project_config_display],
+ )
+
+ # Connect generate_json_btn to generate_json_request function
+ generate_json_btn.click(
+ fn=generate_json_request,
+ inputs=[
+ project_dropdown,
+ docs_url_input,
+ additional_instruction,
+ force_retranslate
+ ],
+ outputs=[json_request_textbox],
+ )
+
+ start_translate_btn.click(
+ fn=start_translate_handler_mcp,
+ inputs=[json_request_textbox],
+ outputs=[status_display, translated_output, raw_text_output, json_output],
+ )
+ return demo
+
+if __name__ == "__main__":
+ demo = create_mcp_interface()
+ demo.launch()
diff --git a/translator/content.py b/translator/content.py
index cb7e2a3..e3c3dad 100644
--- a/translator/content.py
+++ b/translator/content.py
@@ -3,7 +3,7 @@
import string
import requests
-from langchain.callbacks import get_openai_callback
+
from langchain_anthropic import ChatAnthropic
import boto3
import json
@@ -12,21 +12,32 @@
from translator.project_config import get_project_config
-def get_content(filepath: str, project: str = "transformers") -> str:
- if filepath == "":
- raise ValueError("No files selected for translation.")
+def get_content(project: str = "transformers", docs_url: str | None = None) -> str:
+ if not docs_url:
+ raise ValueError("docs_url must be provided to get_content.")
- config = get_project_config(project)
- # Extract repo path from repo_url (e.g., "huggingface/transformers")
- repo_path = config.repo_url.replace("https://github.com/", "")
+ url = ""
+ print(f"[DEBUG] get_content received docs_url: {docs_url}")
+ if "/blob/" in docs_url:
+ # It's a full GitHub blob URL, convert to raw
+ url = docs_url.replace("https://github.com/", "https://raw.githubusercontent.com/").replace("/blob/", "/")
+ else:
+ # Assume it's a base GitHub repo URL, combine with filepath and main branch
+ # This case should ideally not be hit if docs_url is always a full blob URL
+ # but kept for robustness if the input format varies.
+ repo_owner_repo = docs_url.replace("https://github.com/", "")
+ # We need to extract the filepath from docs_url if it's not a full blob URL
+ # For now, raising an error if it's not a full blob URL to enforce input consistency.
+ raise ValueError("docs_url must be a full GitHub blob URL (e.g., containing /blob/).")
- url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}"
+ print(f"[DEBUG] Constructed content URL: {url}")
+
response = requests.get(url)
if response.status_code == 200:
content = response.text
return content
else:
- raise ValueError("Failed to retrieve content from the URL.", url)
+ raise ValueError(f"Failed to retrieve content from the URL: {url}. Status code: {response.status_code}")
def preprocess_content(content: str) -> str:
From 4e61942c66e7301511a0194dcc014f56722596a6 Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 01:14:02 -0800
Subject: [PATCH 2/4] Initial of pr_generator MCP
---
mcp/__init__.py | 5 ++
mcp/pr_uploader.py | 167 +++++++++++++++++++++++++++++++++++++++
pr_generator/agent.py | 6 +-
pr_generator/searcher.py | 12 ++-
4 files changed, 184 insertions(+), 6 deletions(-)
create mode 100644 mcp/__init__.py
create mode 100644 mcp/pr_uploader.py
diff --git a/mcp/__init__.py b/mcp/__init__.py
new file mode 100644
index 0000000..a379474
--- /dev/null
+++ b/mcp/__init__.py
@@ -0,0 +1,5 @@
+"""Implementation of MCP server for Hugging Face i18n Agent"""
+
+from .pr_uploader import create_pr_agent_interface
+
+__all__ = ["create_pr_agent_interface"]
\ No newline at end of file
diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py
new file mode 100644
index 0000000..4c18af7
--- /dev/null
+++ b/mcp/pr_uploader.py
@@ -0,0 +1,167 @@
+import gradio as gr
+import os
+import json
+import re
+from dotenv import load_dotenv
+
+load_dotenv() # Load environment variables from .env file
+
+from pr_generator.agent import GitHubPRAgent
+from pr_generator.searcher import find_reference_pr_simple_stream
+
+# Initialize GitHubPRAgent
+# These should be set as environment variables
+USER_OWNER = os.environ.get("GH_USER_OWNER", "your_github_username")
+USER_REPO = os.environ.get("GH_USER_REPO", "your_forked_repo_name")
+BASE_OWNER = os.environ.get("GH_BASE_OWNER", "huggingface")
+BASE_REPO = os.environ.get("GH_BASE_REPO", "transformers")
+
+pr_agent = GitHubPRAgent(
+ user_owner=USER_OWNER,
+ user_repo=USER_REPO,
+ base_owner=BASE_OWNER,
+ base_repo=BASE_REPO
+)
+
+def start_pr_generation_mcp(
+ reference_pr_url: str,
+ target_language: str,
+ filepath: str,
+ translated_doc_content: str,
+ base_branch: str = "main",
+):
+ # This function will call the GitHubPRAgent's workflow
+ # and return the results for display in Gradio.
+ # The actual implementation will involve calling pr_agent.run_translation_pr_workflow
+ # and handling its output.
+
+ # Placeholder for actual PR generation logic
+ print(f"Starting PR generation with:")
+ print(f" Reference PR URL: {reference_pr_url}")
+ print(f" Target Language: {target_language}")
+ print(f" Filepath: {filepath}")
+ print(f" Translated Content Length: {len(translated_doc_content)} bytes")
+ print(f" Base Branch: {base_branch}")
+
+ try:
+ result = pr_agent.run_translation_pr_workflow(
+ reference_pr_url=reference_pr_url,
+ target_language=target_language,
+ filepath=filepath,
+ translated_doc=translated_doc_content,
+ base_branch=base_branch,
+ )
+
+ if result["status"] == "success":
+ message = f"✅ PR created successfully: {result['pr_url']}"
+ return gr.Textbox(value=message), gr.Json(value=result)
+ elif result["status"] == "partial_success":
+ message = f"⚠️ Partial success: {result['message']}"
+ return gr.Textbox(value=message), gr.Json(value=result)
+ else:
+ message = f"❌ Error during PR generation: {result['message']}"
+ return gr.Textbox(value=message), gr.Json(value=result)
+
+ except Exception as e:
+ error_message = f"❌ Unexpected error during PR generation: {str(e)}"
+ return gr.Textbox(value=error_message), gr.Json(value={"error": error_message})
+
+def search_reference_pr_mcp(target_language: str, context: str):
+ # This function will call the searcher agent and return the best PR URL.
+ # It will also stream the progress messages.
+
+ search_generator = find_reference_pr_simple_stream(target_language=target_language, context=context)
+
+ # Collect all messages and the final result
+ messages = []
+ final_result = None
+ try:
+ while True:
+ message = next(search_generator)
+ messages.append(message)
+ print(message) # Print to console for real-time feedback
+ except StopIteration as e:
+ final_result = e.value
+
+ if final_result and final_result.get("status") == "success":
+ pr_url = final_result.get("result", "").replace("Recommended PR URL: ", "")
+ return gr.Textbox(value="\n".join(messages)), gr.Textbox(value=pr_url)
+ else:
+ error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search."
+ return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="")
+
+
+def create_pr_agent_interface():
+ with gr.Blocks(css="""
+ .markdown-scrollable {
+ overflow-y: auto;
+ }
+ """) as demo:
+ gr.Markdown("## PR Agent Module MCP Server")
+
+ pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle")
+ start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button")
+
+ with gr.TabItem("PR Generation Inputs", id=0):
+ gr.Markdown("### 🔍 Reference PR Search")
+ with gr.Row():
+ search_target_language = gr.Textbox(label="Target Language (for search)", value="korean")
+ search_context = gr.Textbox(label="Context (for search)", value="docs")
+ search_pr_btn = gr.Button("Search Reference PR")
+ search_output = gr.Textbox(label="Search Progress", interactive=False, lines=5)
+ recommended_pr_url = gr.Textbox(label="Recommended Reference PR URL", interactive=True)
+
+ gr.Markdown("### 📝 PR Generation Details")
+ reference_pr_url_input = gr.Textbox(
+ label="🔗 Reference PR URL",
+ value="https://github.com/huggingface/transformers/pull/24968",
+ placeholder="e.g., https://github.com/huggingface/transformers/pull/24968",
+ )
+ target_language_input = gr.Textbox(
+ label="🌐 Target Language",
+ value="ko",
+ placeholder="e.g., ko, ja, fr",
+ )
+ filepath_input = gr.Textbox(
+ label="📁 Original File Path (e.g., docs/source/en/accelerator_selection.md)",
+ value="docs/source/en/accelerator_selection.md",
+ placeholder="e.g., docs/source/en/accelerator_selection.md",
+ )
+ translated_doc_content_input = gr.Textbox(
+ label="📄 Translated Document Content",
+ value="# Translated Accelerator Selection\n\nThis is the translated content.",
+ lines=10,
+ interactive=True,
+ )
+ base_branch_input = gr.Textbox(
+ label="🌿 Base Branch (e.g., main)",
+ value="main",
+ )
+
+ pr_json_output = gr.Json(
+ label="PR Generation Raw JSON Output",
+ value={},
+ )
+
+ search_pr_btn.click(
+ fn=search_reference_pr_mcp,
+ inputs=[search_target_language, search_context],
+ outputs=[search_output, recommended_pr_url],
+ )
+
+ start_pr_btn.click(
+ fn=start_pr_generation_mcp,
+ inputs=[
+ reference_pr_url_input,
+ target_language_input,
+ filepath_input,
+ translated_doc_content_input,
+ base_branch_input,
+ ],
+ outputs=[pr_status_display, pr_json_output],
+ )
+ return demo
+
+if __name__ == "__main__":
+ demo = create_pr_agent_interface()
+ demo.launch()
diff --git a/pr_generator/agent.py b/pr_generator/agent.py
index 8e43d5c..098986e 100644
--- a/pr_generator/agent.py
+++ b/pr_generator/agent.py
@@ -8,6 +8,7 @@
import re
import json
from typing import Optional, Dict, List, Tuple, Any
+from github import Github
# Load environment variables from .env file
from dotenv import load_dotenv
@@ -21,16 +22,17 @@
# Library imports and error handling
try:
- from github import Github, GithubException
from github.GitRef import GitRef
from langchain_anthropic import ChatAnthropic
REQUIRED_LIBS_AVAILABLE = True
except ImportError as e:
- print(f"Required libraries are not installed: {e}")
+ print(f"DEBUG: ImportError in agent.py: {e}")
print("Please run: pip install PyGithub boto3 langchain-anthropic")
REQUIRED_LIBS_AVAILABLE = False
+print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in agent.py: {REQUIRED_LIBS_AVAILABLE}")
+
class GitHubPRAgent:
"""Agent class for GitHub PR creation"""
diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py
index 7ade3a4..aebfc0b 100644
--- a/pr_generator/searcher.py
+++ b/pr_generator/searcher.py
@@ -7,6 +7,8 @@
import re
import logging
from typing import List, Dict, Any, Optional
+from github import Github
+from langchain_core.tools import StructuredTool
# Load environment variables
from dotenv import load_dotenv
@@ -22,16 +24,18 @@
# Langchain imports
try:
from langchain_anthropic import ChatAnthropic
- from langchain.tools import StructuredTool
- from langchain.agents import AgentExecutor, create_tool_calling_agent
+ from langchain_classic.agents import AgentExecutor
+ from langchain.agents import create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
- from github import Github
REQUIRED_LIBS_AVAILABLE = True
except ImportError as e:
- print(f"Required libraries are not installed: {e}")
+ print(f"DEBUG: ImportError in searcher.py: {e}")
+ print("Please run: pip install PyGithub boto3 langchain-anthropic")
REQUIRED_LIBS_AVAILABLE = False
+print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in searcher.py: {REQUIRED_LIBS_AVAILABLE}")
+
# Constants
ANTHROPIC_MODEL_ID = "claude-sonnet-4-20250514"
DEFAULT_TEMPERATURE = 0.0
From c487bad1b6e1e305b49d611b1ed32716358bd7cc Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 04:07:24 -0800
Subject: [PATCH 3/4] Updaete pr generator mdoule
---
mcp/pr_uploader.py | 106 +++++++++++++++++++++++++++++++--------
pr_generator/agent.py | 94 ++++++++++++++++++++++++++++++++--
pr_generator/searcher.py | 3 +-
3 files changed, 178 insertions(+), 25 deletions(-)
diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py
index 4c18af7..cab7af3 100644
--- a/mcp/pr_uploader.py
+++ b/mcp/pr_uploader.py
@@ -27,45 +27,49 @@ def start_pr_generation_mcp(
reference_pr_url: str,
target_language: str,
filepath: str,
- translated_doc_content: str,
+ translated_filepath: str,
base_branch: str = "main",
+ preview_mode: bool = False,
):
# This function will call the GitHubPRAgent's workflow
# and return the results for display in Gradio.
- # The actual implementation will involve calling pr_agent.run_translation_pr_workflow
- # and handling its output.
- # Placeholder for actual PR generation logic
print(f"Starting PR generation with:")
print(f" Reference PR URL: {reference_pr_url}")
print(f" Target Language: {target_language}")
print(f" Filepath: {filepath}")
- print(f" Translated Content Length: {len(translated_doc_content)} bytes")
+ print(f" Translated Filepath: {translated_filepath}") # Pass the filepath directly
print(f" Base Branch: {base_branch}")
+ print(f" Preview Mode: {preview_mode}") # Log preview mode status
try:
result = pr_agent.run_translation_pr_workflow(
reference_pr_url=reference_pr_url,
target_language=target_language,
filepath=filepath,
- translated_doc=translated_doc_content,
+ translated_filepath=translated_filepath, # Pass the filepath directly
base_branch=base_branch,
+ preview_mode=preview_mode, # Pass preview_mode to the agent
)
- if result["status"] == "success":
+ if result["status"] == "preview":
+ message = "✨ PR Preview Generated Successfully!"
+ # Return preview data, and also enable the checkbox and button
+ return gr.Textbox(value=message), gr.Json(value=result["data"]), result["data"], gr.update(interactive=True), gr.update(interactive=True)
+ elif result["status"] == "success":
message = f"✅ PR created successfully: {result['pr_url']}"
- return gr.Textbox(value=message), gr.Json(value=result)
+ # On success, reset checkbox and button
+ return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
elif result["status"] == "partial_success":
message = f"⚠️ Partial success: {result['message']}"
- return gr.Textbox(value=message), gr.Json(value=result)
+ return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
else:
message = f"❌ Error during PR generation: {result['message']}"
- return gr.Textbox(value=message), gr.Json(value=result)
+ return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
except Exception as e:
error_message = f"❌ Unexpected error during PR generation: {str(e)}"
- return gr.Textbox(value=error_message), gr.Json(value={"error": error_message})
-
+ return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
def search_reference_pr_mcp(target_language: str, context: str):
# This function will call the searcher agent and return the best PR URL.
# It will also stream the progress messages.
@@ -90,6 +94,46 @@ def search_reference_pr_mcp(target_language: str, context: str):
error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search."
return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="")
+def handle_pr_confirmation_mcp(preview_data: dict, approved: bool):
+ if not approved:
+ message = "❌ PR creation cancelled by user."
+ return gr.Textbox(value=message), gr.Json(value=preview_data), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+ if not preview_data:
+ message = "❌ No preview data available to create PR."
+ return gr.Textbox(value=message), gr.Json(value={}), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+ try:
+ # Extract necessary parameters from preview_data
+ reference_pr_url = preview_data["reference_pr_url"]
+ target_language = preview_data["target_language"]
+ filepath = preview_data["filepath"]
+ translated_filepath = preview_data["target_filepath"] # Get the filepath
+ base_branch = preview_data["base_branch_for_pr"].split(":")[-1]
+
+ print(f"Executing PR creation for: {filepath} to {target_language}")
+ result = pr_agent.run_translation_pr_workflow(
+ reference_pr_url=reference_pr_url,
+ target_language=target_language,
+ filepath=filepath,
+ translated_filepath=translated_filepath, # Pass the filepath directly
+ base_branch=base_branch,
+ preview_mode=False, # Actual creation mode
+ )
+
+ if result["status"] == "success":
+ message = f"✅ PR created successfully: {result['pr_url']}"
+ return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+ elif result["status"] == "partial_success":
+ message = f"⚠️ Partial success: {result['message']}"
+ return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+ else:
+ message = f"❌ Error during PR creation: {result['message']}"
+ return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+ except Exception as e:
+ error_message = f"❌ Unexpected error during PR creation: {str(e)}"
+ return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), gr.update(value=False, interactive=False), gr.update(interactive=False)
def create_pr_agent_interface():
with gr.Blocks(css="""
@@ -100,7 +144,8 @@ def create_pr_agent_interface():
gr.Markdown("## PR Agent Module MCP Server")
pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle")
- start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button")
+ with gr.Row(): # Use gr.Row to place buttons side-by-side
+ preview_pr_btn = gr.Button("Preview PR (JSON)", elem_classes="secondary-button") # New button for preview
with gr.TabItem("PR Generation Inputs", id=0):
gr.Markdown("### 🔍 Reference PR Search")
@@ -127,11 +172,12 @@ def create_pr_agent_interface():
value="docs/source/en/accelerator_selection.md",
placeholder="e.g., docs/source/en/accelerator_selection.md",
)
- translated_doc_content_input = gr.Textbox(
- label="📄 Translated Document Content",
- value="# Translated Accelerator Selection\n\nThis is the translated content.",
- lines=10,
+ translated_filepath_input = gr.Textbox(
+ label="📄 Translated Document File Path (e.g., path/to/translated_file.md)",
+ value="translation_result/docs/source/ko/accelerator_selection.md",
+ lines=1,
interactive=True,
+ placeholder="e.g., translation_result/docs/source/ko/accelerator_selection.md",
)
base_branch_input = gr.Textbox(
label="🌿 Base Branch (e.g., main)",
@@ -142,6 +188,14 @@ def create_pr_agent_interface():
label="PR Generation Raw JSON Output",
value={},
)
+
+ # New UI for human approval
+ with gr.Row():
+ confirmation_checkbox = gr.Checkbox(label="I approve this PR preview and wish to proceed with actual PR creation.", interactive=False)
+ confirm_pr_btn = gr.Button("Confirm & Create PR", elem_classes="action-button", interactive=False)
+
+ # Hidden state to store preview data
+ pr_preview_state = gr.State(value=None)
search_pr_btn.click(
fn=search_reference_pr_mcp,
@@ -149,16 +203,28 @@ def create_pr_agent_interface():
outputs=[search_output, recommended_pr_url],
)
- start_pr_btn.click(
+ preview_pr_btn.click( # Modified click event for preview button
fn=start_pr_generation_mcp,
inputs=[
reference_pr_url_input,
target_language_input,
filepath_input,
- translated_doc_content_input,
+ translated_filepath_input, # Changed to use the new filepath input
base_branch_input,
+ gr.State(True), # Pass True for preview_mode
],
- outputs=[pr_status_display, pr_json_output],
+ outputs=[pr_status_display, pr_json_output, pr_preview_state, confirmation_checkbox, confirm_pr_btn],
+ ).success(
+ fn=lambda x: [gr.update(interactive=True), gr.update(interactive=True)], # Enable checkbox and button
+ inputs=pr_preview_state, # Use output from start_pr_generation_mcp to trigger
+ outputs=[confirmation_checkbox, confirm_pr_btn],
+ queue=False,
+ )
+
+ confirm_pr_btn.click( # New click event for confirm button
+ fn=lambda preview_data, approved: handle_pr_confirmation_mcp(preview_data, approved),
+ inputs=[pr_preview_state, confirmation_checkbox],
+ outputs=[pr_status_display, pr_json_output, confirmation_checkbox, confirm_pr_btn], # Reset checkbox and button state
)
return demo
diff --git a/pr_generator/agent.py b/pr_generator/agent.py
index 098986e..8924ca2 100644
--- a/pr_generator/agent.py
+++ b/pr_generator/agent.py
@@ -23,6 +23,7 @@
# Library imports and error handling
try:
from github.GitRef import GitRef
+ from github import GithubException
from langchain_anthropic import ChatAnthropic
REQUIRED_LIBS_AVAILABLE = True
@@ -418,23 +419,110 @@ def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
except Exception as e:
return f"Failed to retrieve branch information: {str(e)}"
+ def _prepare_pr_data(
+ self,
+ reference_pr_url: str,
+ target_language: str,
+ filepath: str,
+ translated_filepath: str, # Changed to accept filepath
+ base_branch: str = "main",
+ ) -> Dict[str, Any]:
+ """Prepare all data required for PR creation without making GitHub API calls."""
+ # 1. Analyze reference PR
+ pr_analysis = self.analyze_reference_pr(reference_pr_url)
+ if "error" in pr_analysis:
+ return {"status": "error", "message": pr_analysis["error"]}
+
+ # Read the translated document content from the provided file path
+ translated_doc_content = ""
+ if translated_filepath:
+ print(f"DEBUG (agent.py -> _prepare_pr_data): Attempting to read translated file from: {translated_filepath}") # Added logging
+ try:
+ with open(translated_filepath, 'r', encoding='utf-8') as f:
+ translated_doc_content = f.read()
+ except FileNotFoundError:
+ return {"status": "error", "message": f"Translated file not found: {translated_filepath}"}
+ except Exception as e:
+ return {"status": "error", "message": f"Error reading translated file {translated_filepath}: {str(e)}"}
+
+ # 2. Generate translation file path and branch name
+ target_filepath = filepath.replace("/en/", f"/{target_language}/")
+ file_name = filepath.split("/")[-1]
+
+ branch_name = self.generate_branch_name_from_reference(
+ pr_analysis["head_branch"], target_language, file_name
+ )
+
+ # 3. Generate commit message
+ commit_messages = [commit["message"] for commit in pr_analysis["commits"]]
+ commit_message = self.generate_commit_message_from_reference(
+ commit_messages, target_language, file_name
+ )
+
+ # 4. Generate PR title and body
+ pr_title, pr_body = self.generate_pr_content_from_reference(
+ pr_analysis["title"],
+ pr_analysis["body"],
+ target_language,
+ filepath,
+ target_filepath,
+ file_name,
+ )
+
+ return {
+ "status": "preview_ready",
+ "reference_pr_url": reference_pr_url,
+ "target_language": target_language,
+ "filepath": filepath,
+ "branch_name": branch_name,
+ "commit_message": commit_message,
+ "target_filepath": target_filepath,
+ "pr_title": pr_title,
+ "pr_body": pr_body,
+ "head_branch_for_pr": f"{self.user_owner}:{branch_name}",
+ "base_branch_for_pr": f"{self.base_owner}:{base_branch}",
+ }
+
def run_translation_pr_workflow(
self,
reference_pr_url: str,
target_language: str,
filepath: str,
- translated_doc: str,
+ translated_filepath: str, # Changed to accept filepath
base_branch: str = "main",
+ preview_mode: bool = False,
) -> Dict[str, Any]:
"""Execute translation document PR creation workflow."""
try:
+ if preview_mode:
+ print("🚀 Running in preview mode...")
+ preview_data = self._prepare_pr_data(
+ reference_pr_url, target_language, filepath, translated_filepath, base_branch
+ )
+ if preview_data["status"] == "error":
+ return preview_data # Return error from preparation
+ return {"status": "preview", "data": preview_data}
+
+
+ # If not in preview mode, read the translated content from the file
+ translated_doc_content = ""
+ if translated_filepath:
+ try:
+ with open(translated_filepath, 'r', encoding='utf-8') as f:
+ translated_doc_content = f.read()
+ except FileNotFoundError:
+ error_message = f"❌ Translated file not found: {translated_filepath}"
+ return {"status": "error", "message": error_message, "error_details": error_message}
+ except Exception as e:
+ error_message = f"❌ Error reading translated file {translated_filepath}: {str(e)}"
+ return {"status": "error", "message": error_message, "error_details": error_message}
+
# 1. Analyze reference PR
print(f"🔍 Analyzing reference PR: {reference_pr_url}")
pr_analysis = self.analyze_reference_pr(reference_pr_url)
if "error" in pr_analysis:
return {"status": "error", "message": pr_analysis["error"]}
-
print("Reference PR analysis completed")
# 2. Generate translation file path and branch name
@@ -483,7 +571,7 @@ def run_translation_pr_workflow(
self.user_repo,
target_filepath,
commit_message,
- translated_doc,
+ translated_doc_content, # Pass the read content
branch_name,
)
diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py
index aebfc0b..d517db3 100644
--- a/pr_generator/searcher.py
+++ b/pr_generator/searcher.py
@@ -24,8 +24,7 @@
# Langchain imports
try:
from langchain_anthropic import ChatAnthropic
- from langchain_classic.agents import AgentExecutor
- from langchain.agents import create_tool_calling_agent
+ from langchain.agents import create_tool_calling_agent, AgentExecutor # Explicitly import AgentExecutor
from langchain_core.prompts import ChatPromptTemplate
REQUIRED_LIBS_AVAILABLE = True
From 51cb24a29799dc6694a8143d5e3b070862d88140 Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 04:08:01 -0800
Subject: [PATCH 4/4] Delete wrong files
---
.../docs/source/en/accelerator_selection.md | 127 ------------------
1 file changed, 127 deletions(-)
delete mode 100644 translation_result/docs/source/en/accelerator_selection.md
diff --git a/translation_result/docs/source/en/accelerator_selection.md b/translation_result/docs/source/en/accelerator_selection.md
deleted file mode 100644
index 58cbfa1..0000000
--- a/translation_result/docs/source/en/accelerator_selection.md
+++ /dev/null
@@ -1,127 +0,0 @@
-
-
-# 가속기 선택 [[accelerator-selection]]
-
-분산 학습 중에는 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 컴퓨팅 성능을 가진 가속기가 있을 때 더 빠른 가속기를 먼저 사용하고 싶은 경우에 유용할 수 있습니다. 또는 사용 가능한 가속기의 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed)는 필요하지 않습니다.
-
-이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
-
-## 가속기 수 [[number-of-accelerators]]
-
-예를 들어, 4개의 가속기가 있고 처음 2개만 사용하고 싶다면 아래 명령을 실행하세요.
-
-
-
-
-`--nproc_per_node`를 사용하여 사용할 가속기 수를 선택합니다.
-
-```bash
-torchrun --nproc_per_node=2 trainer-program.py ...
-```
-
-
-
-
-`--num_processes`를 사용하여 사용할 가속기 수를 선택합니다.
-
-```bash
-accelerate launch --num_processes 2 trainer-program.py ...
-```
-
-
-
-
-`--num_gpus`를 사용하여 사용할 GPU 수를 선택합니다.
-
-```bash
-deepspeed --num_gpus 2 trainer-program.py ...
-```
-
-
-
-
-## 가속기 순서 [[order-of-accelerators]]
-사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 종종 각 실행에 대해 명령줄에서 설정되지만, `~/.bashrc`나 다른 시작 구성 파일에 추가할 수도 있습니다.
-
-예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
-
-
-
-
-```bash
-CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
-```
-
-GPU 0과 2만 PyTorch에서 "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다.
-순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
-
-
-```bash
-CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py ...
-```
-
-GPU 없이 실행하려면:
-
-```bash
-CUDA_VISIBLE_DEVICES= python trainer-program.py ...
-```
-
-`CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치의 순서를 제어할 수도 있습니다:
-
-- PCIe 버스 ID 순서 (`nvidia-smi`와 일치):
-
- ```bash
-$hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
- ```
-
-- 컴퓨팅 성능 순서 (가장 빠른 것부터):
-
- ```bash
- export CUDA_DEVICE_ORDER=FASTEST_FIRST
- ```
-
-
-
-
-```bash
-ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
-```
-
-XPU 0과 2만 PyTorch에서 "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다.
-순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
-
-```bash
-ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
-```
-
-
-다음을 사용하여 Intel XPU의 순서를 제어할 수도 있습니다:
-
-```bash
-export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
-```
-
-Intel XPU에서의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요.
-
-
-
-
-
-
-> [!WARNING]
-> 환경 변수는 명령줄에 추가하는 대신 내보낼 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 잘못된 가속기를 사용하게 될 수 있어 혼란을 야기할 수 있으므로 권장하지 않습니다. 대신, 같은 명령줄에서 특정 훈련 실행을 위해 환경 변수를 설정하는 것이 일반적인 관례입니다.
-```
\ No newline at end of file