From 9031b06dcc64c3c2e8b833223daa12e01cbf033d Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 10 Nov 2025 23:13:16 +0900
Subject: [PATCH 1/4] Implement mcp server for translation module

---
 mcp_server.py         | 365 ++++++++++++++++++++++++++++++++++++++++++
 translator/content.py |  29 ++--
 2 files changed, 385 insertions(+), 9 deletions(-)
 create mode 100644 mcp_server.py

diff --git a/mcp_server.py b/mcp_server.py
new file mode 100644
index 0000000..ec78ff9
--- /dev/null
+++ b/mcp_server.py
@@ -0,0 +1,365 @@
+import gradio as gr
+import os
+from dotenv import load_dotenv
+import json
+import re
+
+load_dotenv() # Load environment variables from .env file
+from translator.project_config import get_available_projects, get_project_config
+from translator.content import get_content, preprocess_content, get_full_prompt, llm_translate, fill_scaffold
+from translator.retriever import report
+import os
+from pathlib import Path
+
+def start_translate_handler_mcp(json_input_str):
+    file_to_translate = ""
+    project = ""
+    repo_url = ""
+    additional_instruction = ""
+    force_retranslate = False
+    request_data = {} # Initialize request_data for error context
+
+    try:
+        request_data_from_json = json.loads(json_input_str)
+        
+        # Extract top-level fields for translation control
+        additional_instruction = request_data_from_json.get("additional_instruction", "")
+        force_retranslate = request_data_from_json.get("force_retranslate", False)
+        target_language = request_data_from_json.get("request", {}).get("target_language", "ko")
+        source_language = "en" # Assuming source language is always English for now
+
+        # Extract file details from the 'files' array (assuming the first file is the target)
+        files_list = request_data_from_json.get("files", [])
+        if not files_list:
+            raise ValueError("No files found in the JSON input for translation.")
+        
+        selected_file_data = files_list[0]
+        docs_url = selected_file_data.get("repo_url") # This is the full blob URL
+        project = selected_file_data.get("metadata", {}).get("project")
+        docs_path = selected_file_data.get("metadata", {}).get("docs_path") # Extract docs_path
+
+        # Extract file_to_translate from docs_url
+        file_to_translate = ""
+        if "/blob/main/" in docs_url:
+            file_to_translate = docs_url.split("/blob/main/")[1]
+        elif "/blob/" in docs_url: # Handle other branches if necessary
+            parts = docs_url.split("/blob/")
+            if len(parts) > 1:
+                file_to_translate = parts[1].split("/", 1)[1] # Get path after branch name
+
+        # additional_instruction is extracted from the top-level, force_retranslate is also extracted.
+        # No need to re-initialize them here.
+
+        # Construct request_data for the output JSON, using extracted values
+        request_data = {
+            "project": project,
+            "target_language": target_language,
+            "source_language": source_language,
+            "files": [
+                {
+                    "repo_url": docs_url, # Use docs_url here
+                    "file_path": file_to_translate
+                }
+            ]
+        }
+
+    except json.JSONDecodeError as e:
+        error_message = f"❌ Invalid JSON input: {str(e)}"
+        return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+    except ValueError as e:
+        error_message = f"❌ Invalid JSON structure: {str(e)}"
+        return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+    except Exception as e:
+        error_message = f"❌ Error parsing JSON input: {str(e)}"
+        return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value={"error": error_message})
+
+    print(f"Received request: file={file_to_translate}, project={project}, repo_url={repo_url}, instruction={additional_instruction}, force_retranslate={force_retranslate}")
+    
+    print(f"[DEBUG] Raw JSON input: {json_input_str}")
+    print(f"[DEBUG] Extracted file_to_translate: {file_to_translate}")
+    
+    if not file_to_translate:
+        response = "❌ Please provide a file path to translate in the JSON input."
+        return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response})
+
+    if not project:
+        response = "❌ Please select a project in the JSON input."
+        return gr.Textbox(value=f"Error: {response}"), gr.Markdown(value=""), gr.Json(value={"error": response})
+
+    # Define paths for translated files dynamically
+    base_output_dir = Path("translation_result") / Path(docs_path) / target_language
+    base_output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Construct the path for the translated file
+    # Example: docs/source/en/chat_response_parsing.md -> translation_result/docs/source/ko/chat_response_parsing.md
+    translated_file_name = Path(file_to_translate).name
+    translated_file_path = base_output_dir / translated_file_name
+    print(f"[DEBUG] Constructed translated_file_path: {translated_file_path}")
+    print(f"[DEBUG] Does translated_file_path exist? {translated_file_path.exists()}")
+
+    translated_doc = ""
+    response_message = ""
+    final_json_output = {} # Initialize here
+
+    try:
+        result_entry = {
+            "file_path": str(translated_file_path.relative_to(Path("translation_result"))),
+            "translated_content": "",
+            "status": "",
+            "metadata": {
+                "time_elapsed": 0.0, # Placeholder, actual implementation would measure this
+                "model_used": ""
+            }
+        }
+
+        if not force_retranslate and translated_file_path.exists():
+            # Reuse existing translation
+            with open(translated_file_path, "r", encoding="utf-8") as f:
+                translated_doc = f.read()
+            response_message = f"✅ Reused existing translation for {file_to_translate} (Project: {project})"
+            
+            result_entry["translated_content"] = translated_doc
+            result_entry["status"] = "reused"
+            result_entry["metadata"]["model_used"] = "cached"
+
+            final_json_output = {
+                "type": "translation.output.response",
+                "request": request_data,
+                "results": [result_entry],
+                "error": None
+            }
+        else:
+            # 1. Get content - now passing docs_url
+            original_content = get_content(project, docs_url=docs_url)
+            print(f"[DEBUG] Original content length: {len(original_content)}")
+            
+            # 2. Preprocess content
+            to_translate = preprocess_content(original_content)
+            print(f"[DEBUG] Preprocessed content length: {len(to_translate)}")
+            
+            # 3. Get full prompt
+            full_prompt = get_full_prompt(target_language, to_translate, additional_instruction) # Use extracted target_language
+            
+            # 4. Translate
+            cb, translated_content_raw = llm_translate(full_prompt)
+            print(f"LLM Callback: {cb}")
+            print(f"[DEBUG] Raw translated content length: {len(translated_content_raw)}")
+            
+            # Determine model used for metadata
+            model_used = ""
+            if os.environ.get("ANTHROPIC_API_KEY"):
+                model_used = "claude-sonnet-4-20250514 (Anthropic API)"
+            elif os.environ.get("AWS_BEARER_TOKEN_BEDROCK"):
+                model_used = "claude-3-7-sonnet-20250219-v1 (AWS Bedrock)"
+
+            # 5. Fill scaffold
+            translated_doc = fill_scaffold(original_content, to_translate, translated_content_raw)
+            
+            # 6. Save the new translation
+            with open(translated_file_path, "w", encoding="utf-8") as f:
+                f.write(translated_doc)
+            
+            response_message = f"✅ Successfully translated and saved {file_to_translate} (Project: {project})"
+            
+            result_entry["translated_content"] = translated_doc
+            result_entry["status"] = "success"
+            result_entry["metadata"]["model_used"] = model_used
+
+            final_json_output = {
+                "type": "translation.output.response",
+                "request": request_data,
+                "results": [result_entry],
+                "error": None
+            }
+        print(f"[DEBUG] Final translated_doc content:\n{translated_doc}")
+        
+        # Create a display version of translated_doc for the Markdown component
+        # This version will have problematic custom syntax removed for better rendering.
+        display_translated_doc = translated_doc
+        
+        # Remove XML-style comments for display
+        display_translated_doc = re.sub(r"<!--.*?-->", "", display_translated_doc, flags=re.DOTALL)
+        
+        # Remove <hfoptions> and <hfoption> tags and their content for display
+        display_translated_doc = re.sub(r"<hfoptions.*?>(.*?)</hfoptions>", "", display_translated_doc, flags=re.DOTALL)
+        display_translated_doc = re.sub(r"<hfoption.*?>(.*?)</hfoption>", "", display_translated_doc, flags=re.DOTALL)
+
+        return gr.Textbox(value=f"Translation Complete: {response_message}"), gr.Markdown(value=display_translated_doc), gr.Textbox(value=translated_doc), gr.Json(value=final_json_output)
+    except Exception as e:
+        error_message = f"Error during translation: {str(e)}"
+        # Ensure request_data is defined even in case of early errors for context
+        # If request_data was not successfully parsed, create a minimal one for error context
+        if not request_data:
+            request_data = {
+                "project": project if project else "unknown",
+                "target_language": "ko",
+                "source_language": "en",
+                "files": [
+                    {
+                        "repo_url": repo_url if repo_url else "unknown",
+                        "file_path": file_to_translate if file_to_translate else "unknown"
+                    }
+                ]
+            }
+        error_json_output = {
+            "type": "translation.output.response",
+            "request": request_data,
+            "results": [],
+            "error": error_message
+        }
+        return gr.Textbox(value=error_message), gr.Markdown(value=""), gr.Json(value=error_json_output)
+
+def update_status_mcp():
+    return gr.Textbox(value="Ready")
+
+def update_project_config_display(project):
+    """Update the project config display when project selection changes."""
+    if not project:
+        return ""
+    
+    # Since project_config is no longer used for repo_url, we'll just display the project name.
+    config_html = f"""
+### 📋 Project Configuration: {project}
+
+- **Name:** {project}
+"""
+    return config_html
+
+def generate_json_request(project, docs_url, additional_instruction, force_retranslate):
+    # Extract file_path and docs_path from docs_url
+    file_to_translate = ""
+    docs_path_extracted = ""
+    if "/blob/main/" in docs_url:
+        parts = docs_url.split("/blob/main/")
+        if len(parts) > 1:
+            file_to_translate = parts[1]
+            # Assuming docs_path is the part before the language directory and file name
+            docs_path_parts = file_to_translate.split("/")
+            if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md
+                docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename
+            elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir)
+                docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename
+            else:
+                docs_path_extracted = "" # No valid docs_path found
+    elif "/blob/" in docs_url: # Handle other branches if necessary
+        parts = docs_url.split("/blob/")
+        if len(parts) > 1:
+            path_after_blob = parts[1]
+            branch_and_filepath = path_after_blob.split("/", 1)
+            if len(branch_and_filepath) > 1:
+                file_to_translate = branch_and_filepath[1]
+                docs_path_parts = file_to_translate.split("/")
+                if len(docs_path_parts) > 2: # Ensure there are enough parts for docs/source/en/file.md
+                    docs_path_extracted = "/".join(docs_path_parts[:-2]) # Exclude language and filename
+                elif len(docs_path_parts) > 1: # Fallback if only docs/source/file.md (no language dir)
+                    docs_path_extracted = "/".join(docs_path_parts[:-1]) # Exclude filename
+                else:
+                    docs_path_extracted = "" # No valid docs_path found
+
+    request_data = {
+        "files": [
+            {
+                "path": file_to_translate,
+                "repo_url": docs_url, # Use user-provided docs_url
+                "metadata": {
+                    "project": project,
+                    "docs_path": docs_path_extracted, # Include docs_path here
+                }
+            }
+        ],
+        "additional_instruction": additional_instruction,
+        "force_retranslate": force_retranslate,
+        "target_language": "ko", # Hardcoded target language for this server
+        "source_language": "en", # Hardcoded source language for this server
+    }
+    return json.dumps(request_data, indent=2)
+
+def create_mcp_interface():
+    with gr.Blocks(css="""
+        .markdown-scrollable {
+            overflow-y: auto;
+        }
+    """) as demo:
+        gr.Markdown("## Translation Module MCP Server")
+        
+        status_display = gr.Textbox(label="Status", interactive=False, value="Idle")
+        start_translate_btn = gr.Button("Start Translation (MCP)", elem_classes="action-button")
+
+        with gr.TabItem("Translate Inputs", id=0):
+            project_dropdown = gr.Radio(
+                choices=get_available_projects(),
+                label="🎯 Select Project",
+                value="transformers",
+            )
+            project_config_display = gr.Markdown(value=update_project_config_display("transformers"))
+            docs_url_input = gr.Textbox(
+                label="🔗 Documentation URL (Full blob URL)",
+                value="https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md",
+                placeholder="e.g., https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerator_selection.md",
+            )
+            additional_instruction = gr.Textbox(
+                label="📝 Additional instructions (Optional)",
+                placeholder="Example: Translate 'model' as '모델' consistently",
+                lines=2,
+            )
+            force_retranslate = gr.Checkbox(
+                label="🔄 Force Retranslate",
+                value=False,
+            )
+            
+            generate_json_btn = gr.Button("Generate JSON Request")
+            json_request_textbox = gr.Textbox(
+                label="JSON Request (for Translation)",
+                value="",
+                lines=10,
+                interactive=True,
+            )
+
+            with gr.Row():
+                translated_output = gr.Markdown(
+                    label="Translated Content (Markdown)",
+                    value="",
+                    elem_classes="markdown-scrollable",
+                    height=500, # Explicitly set height to enable scrolling
+                )
+                raw_text_output = gr.Textbox(
+                    label="Translated Content (Raw Text)",
+                    value="",
+                    lines=20, # Give it a reasonable default height
+                    interactive=False,
+                    elem_classes="markdown-scrollable", # Reuse scrollable class
+                )
+            json_output = gr.Json(
+                label="Raw JSON Output",
+                value={},
+            )
+
+        # Update project config display when project selection changes
+        project_dropdown.change(
+            fn=update_project_config_display,
+            inputs=[project_dropdown],
+            outputs=[project_config_display],
+        )
+        
+        # Connect generate_json_btn to generate_json_request function
+        generate_json_btn.click(
+            fn=generate_json_request,
+            inputs=[
+                project_dropdown,
+                docs_url_input,
+                additional_instruction,
+                force_retranslate
+            ],
+            outputs=[json_request_textbox],
+        )
+        
+        start_translate_btn.click(
+            fn=start_translate_handler_mcp,
+            inputs=[json_request_textbox],
+            outputs=[status_display, translated_output, raw_text_output, json_output],
+        )
+    return demo
+
+if __name__ == "__main__":
+    demo = create_mcp_interface()
+    demo.launch()
diff --git a/translator/content.py b/translator/content.py
index cb7e2a3..e3c3dad 100644
--- a/translator/content.py
+++ b/translator/content.py
@@ -3,7 +3,7 @@
 import string
 
 import requests
-from langchain.callbacks import get_openai_callback
+
 from langchain_anthropic import ChatAnthropic
 import boto3
 import json
@@ -12,21 +12,32 @@
 from translator.project_config import get_project_config
 
 
-def get_content(filepath: str, project: str = "transformers") -> str:
-    if filepath == "":
-        raise ValueError("No files selected for translation.")
+def get_content(project: str = "transformers", docs_url: str | None = None) -> str:
+    if not docs_url:
+        raise ValueError("docs_url must be provided to get_content.")
 
-    config = get_project_config(project)
-    # Extract repo path from repo_url (e.g., "huggingface/transformers")
-    repo_path = config.repo_url.replace("https://github.com/", "")
+    url = ""
+    print(f"[DEBUG] get_content received docs_url: {docs_url}")
+    if "/blob/" in docs_url:
+        # It's a full GitHub blob URL, convert to raw
+        url = docs_url.replace("https://github.com/", "https://raw.githubusercontent.com/").replace("/blob/", "/")
+    else:
+        # Assume it's a base GitHub repo URL, combine with filepath and main branch
+        # This case should ideally not be hit if docs_url is always a full blob URL
+        # but kept for robustness if the input format varies.
+        repo_owner_repo = docs_url.replace("https://github.com/", "")
+        # We need to extract the filepath from docs_url if it's not a full blob URL
+        # For now, raising an error if it's not a full blob URL to enforce input consistency.
+        raise ValueError("docs_url must be a full GitHub blob URL (e.g., containing /blob/).")
     
-    url = f"https://raw.githubusercontent.com/{repo_path}/main/{filepath}"
+    print(f"[DEBUG] Constructed content URL: {url}")
+
     response = requests.get(url)
     if response.status_code == 200:
         content = response.text
         return content
     else:
-        raise ValueError("Failed to retrieve content from the URL.", url)
+        raise ValueError(f"Failed to retrieve content from the URL: {url}. Status code: {response.status_code}")
 
 
 def preprocess_content(content: str) -> str:

From 4e61942c66e7301511a0194dcc014f56722596a6 Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 01:14:02 -0800
Subject: [PATCH 2/4] Initial of pr_generator MCP

---
 mcp/__init__.py          |   5 ++
 mcp/pr_uploader.py       | 167 +++++++++++++++++++++++++++++++++++++++
 pr_generator/agent.py    |   6 +-
 pr_generator/searcher.py |  12 ++-
 4 files changed, 184 insertions(+), 6 deletions(-)
 create mode 100644 mcp/__init__.py
 create mode 100644 mcp/pr_uploader.py

diff --git a/mcp/__init__.py b/mcp/__init__.py
new file mode 100644
index 0000000..a379474
--- /dev/null
+++ b/mcp/__init__.py
@@ -0,0 +1,5 @@
+"""Implementation of MCP server for Hugging Face i18n Agent"""
+
+from .pr_uploader import create_pr_agent_interface
+
+__all__ = ["create_pr_agent_interface"]
\ No newline at end of file
diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py
new file mode 100644
index 0000000..4c18af7
--- /dev/null
+++ b/mcp/pr_uploader.py
@@ -0,0 +1,167 @@
+import gradio as gr
+import os
+import json
+import re
+from dotenv import load_dotenv
+
+load_dotenv() # Load environment variables from .env file
+
+from pr_generator.agent import GitHubPRAgent
+from pr_generator.searcher import find_reference_pr_simple_stream
+
+# Initialize GitHubPRAgent
+# These should be set as environment variables
+USER_OWNER = os.environ.get("GH_USER_OWNER", "your_github_username")
+USER_REPO = os.environ.get("GH_USER_REPO", "your_forked_repo_name")
+BASE_OWNER = os.environ.get("GH_BASE_OWNER", "huggingface")
+BASE_REPO = os.environ.get("GH_BASE_REPO", "transformers")
+
+pr_agent = GitHubPRAgent(
+    user_owner=USER_OWNER,
+    user_repo=USER_REPO,
+    base_owner=BASE_OWNER,
+    base_repo=BASE_REPO
+)
+
+def start_pr_generation_mcp(
+    reference_pr_url: str,
+    target_language: str,
+    filepath: str,
+    translated_doc_content: str,
+    base_branch: str = "main",
+):
+    # This function will call the GitHubPRAgent's workflow
+    # and return the results for display in Gradio.
+    # The actual implementation will involve calling pr_agent.run_translation_pr_workflow
+    # and handling its output.
+    
+    # Placeholder for actual PR generation logic
+    print(f"Starting PR generation with:")
+    print(f"  Reference PR URL: {reference_pr_url}")
+    print(f"  Target Language: {target_language}")
+    print(f"  Filepath: {filepath}")
+    print(f"  Translated Content Length: {len(translated_doc_content)} bytes")
+    print(f"  Base Branch: {base_branch}")
+
+    try:
+        result = pr_agent.run_translation_pr_workflow(
+            reference_pr_url=reference_pr_url,
+            target_language=target_language,
+            filepath=filepath,
+            translated_doc=translated_doc_content,
+            base_branch=base_branch,
+        )
+        
+        if result["status"] == "success":
+            message = f"✅ PR created successfully: {result['pr_url']}"
+            return gr.Textbox(value=message), gr.Json(value=result)
+        elif result["status"] == "partial_success":
+            message = f"⚠️ Partial success: {result['message']}"
+            return gr.Textbox(value=message), gr.Json(value=result)
+        else:
+            message = f"❌ Error during PR generation: {result['message']}"
+            return gr.Textbox(value=message), gr.Json(value=result)
+
+    except Exception as e:
+        error_message = f"❌ Unexpected error during PR generation: {str(e)}"
+        return gr.Textbox(value=error_message), gr.Json(value={"error": error_message})
+
+def search_reference_pr_mcp(target_language: str, context: str):
+    # This function will call the searcher agent and return the best PR URL.
+    # It will also stream the progress messages.
+    
+    search_generator = find_reference_pr_simple_stream(target_language=target_language, context=context)
+    
+    # Collect all messages and the final result
+    messages = []
+    final_result = None
+    try:
+        while True:
+            message = next(search_generator)
+            messages.append(message)
+            print(message) # Print to console for real-time feedback
+    except StopIteration as e:
+        final_result = e.value
+
+    if final_result and final_result.get("status") == "success":
+        pr_url = final_result.get("result", "").replace("Recommended PR URL: ", "")
+        return gr.Textbox(value="\n".join(messages)), gr.Textbox(value=pr_url)
+    else:
+        error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search."
+        return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="")
+
+
+def create_pr_agent_interface():
+    with gr.Blocks(css="""
+        .markdown-scrollable {
+            overflow-y: auto;
+        }
+    """) as demo:
+        gr.Markdown("## PR Agent Module MCP Server")
+        
+        pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle")
+        start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button")
+
+        with gr.TabItem("PR Generation Inputs", id=0):
+            gr.Markdown("### 🔍 Reference PR Search")
+            with gr.Row():
+                search_target_language = gr.Textbox(label="Target Language (for search)", value="korean")
+                search_context = gr.Textbox(label="Context (for search)", value="docs")
+                search_pr_btn = gr.Button("Search Reference PR")
+            search_output = gr.Textbox(label="Search Progress", interactive=False, lines=5)
+            recommended_pr_url = gr.Textbox(label="Recommended Reference PR URL", interactive=True)
+
+            gr.Markdown("### 📝 PR Generation Details")
+            reference_pr_url_input = gr.Textbox(
+                label="🔗 Reference PR URL",
+                value="https://github.com/huggingface/transformers/pull/24968",
+                placeholder="e.g., https://github.com/huggingface/transformers/pull/24968",
+            )
+            target_language_input = gr.Textbox(
+                label="🌐 Target Language",
+                value="ko",
+                placeholder="e.g., ko, ja, fr",
+            )
+            filepath_input = gr.Textbox(
+                label="📁 Original File Path (e.g., docs/source/en/accelerator_selection.md)",
+                value="docs/source/en/accelerator_selection.md",
+                placeholder="e.g., docs/source/en/accelerator_selection.md",
+            )
+            translated_doc_content_input = gr.Textbox(
+                label="📄 Translated Document Content",
+                value="# Translated Accelerator Selection\n\nThis is the translated content.",
+                lines=10,
+                interactive=True,
+            )
+            base_branch_input = gr.Textbox(
+                label="🌿 Base Branch (e.g., main)",
+                value="main",
+            )
+            
+            pr_json_output = gr.Json(
+                label="PR Generation Raw JSON Output",
+                value={},
+            )
+
+        search_pr_btn.click(
+            fn=search_reference_pr_mcp,
+            inputs=[search_target_language, search_context],
+            outputs=[search_output, recommended_pr_url],
+        )
+
+        start_pr_btn.click(
+            fn=start_pr_generation_mcp,
+            inputs=[
+                reference_pr_url_input,
+                target_language_input,
+                filepath_input,
+                translated_doc_content_input,
+                base_branch_input,
+            ],
+            outputs=[pr_status_display, pr_json_output],
+        )
+    return demo
+
+if __name__ == "__main__":
+    demo = create_pr_agent_interface()
+    demo.launch()
diff --git a/pr_generator/agent.py b/pr_generator/agent.py
index 8e43d5c..098986e 100644
--- a/pr_generator/agent.py
+++ b/pr_generator/agent.py
@@ -8,6 +8,7 @@
 import re
 import json
 from typing import Optional, Dict, List, Tuple, Any
+from github import Github
 
 # Load environment variables from .env file
 from dotenv import load_dotenv
@@ -21,16 +22,17 @@
 
 # Library imports and error handling
 try:
-    from github import Github, GithubException
     from github.GitRef import GitRef
     from langchain_anthropic import ChatAnthropic
 
     REQUIRED_LIBS_AVAILABLE = True
 except ImportError as e:
-    print(f"Required libraries are not installed: {e}")
+    print(f"DEBUG: ImportError in agent.py: {e}")
     print("Please run: pip install PyGithub boto3 langchain-anthropic")
     REQUIRED_LIBS_AVAILABLE = False
 
+print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in agent.py: {REQUIRED_LIBS_AVAILABLE}")
+
 
 class GitHubPRAgent:
     """Agent class for GitHub PR creation"""
diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py
index 7ade3a4..aebfc0b 100644
--- a/pr_generator/searcher.py
+++ b/pr_generator/searcher.py
@@ -7,6 +7,8 @@
 import re
 import logging
 from typing import List, Dict, Any, Optional
+from github import Github
+from langchain_core.tools import StructuredTool
 
 # Load environment variables
 from dotenv import load_dotenv
@@ -22,16 +24,18 @@
 # Langchain imports
 try:
     from langchain_anthropic import ChatAnthropic
-    from langchain.tools import StructuredTool
-    from langchain.agents import AgentExecutor, create_tool_calling_agent
+    from langchain_classic.agents import AgentExecutor
+    from langchain.agents import create_tool_calling_agent
     from langchain_core.prompts import ChatPromptTemplate
-    from github import Github
 
     REQUIRED_LIBS_AVAILABLE = True
 except ImportError as e:
-    print(f"Required libraries are not installed: {e}")
+    print(f"DEBUG: ImportError in searcher.py: {e}")
+    print("Please run: pip install PyGithub boto3 langchain-anthropic")
     REQUIRED_LIBS_AVAILABLE = False
 
+print(f"DEBUG: REQUIRED_LIBS_AVAILABLE in searcher.py: {REQUIRED_LIBS_AVAILABLE}")
+
 # Constants
 ANTHROPIC_MODEL_ID = "claude-sonnet-4-20250514"
 DEFAULT_TEMPERATURE = 0.0

From c487bad1b6e1e305b49d611b1ed32716358bd7cc Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 04:07:24 -0800
Subject: [PATCH 3/4] Updaete pr generator mdoule

---
 mcp/pr_uploader.py       | 106 +++++++++++++++++++++++++++++++--------
 pr_generator/agent.py    |  94 ++++++++++++++++++++++++++++++++--
 pr_generator/searcher.py |   3 +-
 3 files changed, 178 insertions(+), 25 deletions(-)

diff --git a/mcp/pr_uploader.py b/mcp/pr_uploader.py
index 4c18af7..cab7af3 100644
--- a/mcp/pr_uploader.py
+++ b/mcp/pr_uploader.py
@@ -27,45 +27,49 @@ def start_pr_generation_mcp(
     reference_pr_url: str,
     target_language: str,
     filepath: str,
-    translated_doc_content: str,
+    translated_filepath: str,
     base_branch: str = "main",
+    preview_mode: bool = False,
 ):
     # This function will call the GitHubPRAgent's workflow
     # and return the results for display in Gradio.
-    # The actual implementation will involve calling pr_agent.run_translation_pr_workflow
-    # and handling its output.
     
-    # Placeholder for actual PR generation logic
     print(f"Starting PR generation with:")
     print(f"  Reference PR URL: {reference_pr_url}")
     print(f"  Target Language: {target_language}")
     print(f"  Filepath: {filepath}")
-    print(f"  Translated Content Length: {len(translated_doc_content)} bytes")
+    print(f"  Translated Filepath: {translated_filepath}") # Pass the filepath directly
     print(f"  Base Branch: {base_branch}")
+    print(f"  Preview Mode: {preview_mode}") # Log preview mode status
 
     try:
         result = pr_agent.run_translation_pr_workflow(
             reference_pr_url=reference_pr_url,
             target_language=target_language,
             filepath=filepath,
-            translated_doc=translated_doc_content,
+            translated_filepath=translated_filepath, # Pass the filepath directly
             base_branch=base_branch,
+            preview_mode=preview_mode, # Pass preview_mode to the agent
         )
         
-        if result["status"] == "success":
+        if result["status"] == "preview":
+            message = "✨ PR Preview Generated Successfully!"
+            # Return preview data, and also enable the checkbox and button
+            return gr.Textbox(value=message), gr.Json(value=result["data"]), result["data"], gr.update(interactive=True), gr.update(interactive=True)
+        elif result["status"] == "success":
             message = f"✅ PR created successfully: {result['pr_url']}"
-            return gr.Textbox(value=message), gr.Json(value=result)
+            # On success, reset checkbox and button
+            return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
         elif result["status"] == "partial_success":
             message = f"⚠️ Partial success: {result['message']}"
-            return gr.Textbox(value=message), gr.Json(value=result)
+            return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
         else:
             message = f"❌ Error during PR generation: {result['message']}"
-            return gr.Textbox(value=message), gr.Json(value=result)
+            return gr.Textbox(value=message), gr.Json(value=result), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
 
     except Exception as e:
         error_message = f"❌ Unexpected error during PR generation: {str(e)}"
-        return gr.Textbox(value=error_message), gr.Json(value={"error": error_message})
-
+        return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), None, gr.update(value=False, interactive=False), gr.update(interactive=False)
 def search_reference_pr_mcp(target_language: str, context: str):
     # This function will call the searcher agent and return the best PR URL.
     # It will also stream the progress messages.
@@ -90,6 +94,46 @@ def search_reference_pr_mcp(target_language: str, context: str):
         error_message = final_result.get("result", "Unknown error during PR search.") if final_result else "No result from PR search."
         return gr.Textbox(value="\n".join(messages) + f"\n❌ {error_message}"), gr.Textbox(value="")
 
+def handle_pr_confirmation_mcp(preview_data: dict, approved: bool):
+    if not approved:
+        message = "❌ PR creation cancelled by user."
+        return gr.Textbox(value=message), gr.Json(value=preview_data), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+    if not preview_data:
+        message = "❌ No preview data available to create PR."
+        return gr.Textbox(value=message), gr.Json(value={}), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+    try:
+        # Extract necessary parameters from preview_data
+        reference_pr_url = preview_data["reference_pr_url"]
+        target_language = preview_data["target_language"]
+        filepath = preview_data["filepath"]
+        translated_filepath = preview_data["target_filepath"] # Get the filepath
+        base_branch = preview_data["base_branch_for_pr"].split(":")[-1]
+
+        print(f"Executing PR creation for: {filepath} to {target_language}")
+        result = pr_agent.run_translation_pr_workflow(
+            reference_pr_url=reference_pr_url,
+            target_language=target_language,
+            filepath=filepath,
+            translated_filepath=translated_filepath, # Pass the filepath directly
+            base_branch=base_branch,
+            preview_mode=False, # Actual creation mode
+        )
+
+        if result["status"] == "success":
+            message = f"✅ PR created successfully: {result['pr_url']}"
+            return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+        elif result["status"] == "partial_success":
+            message = f"⚠️ Partial success: {result['message']}"
+            return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+        else:
+            message = f"❌ Error during PR creation: {result['message']}"
+            return gr.Textbox(value=message), gr.Json(value=result), gr.update(value=False, interactive=False), gr.update(interactive=False)
+
+    except Exception as e:
+        error_message = f"❌ Unexpected error during PR creation: {str(e)}"
+        return gr.Textbox(value=error_message), gr.Json(value={"error": error_message}), gr.update(value=False, interactive=False), gr.update(interactive=False)
 
 def create_pr_agent_interface():
     with gr.Blocks(css="""
@@ -100,7 +144,8 @@ def create_pr_agent_interface():
         gr.Markdown("## PR Agent Module MCP Server")
         
         pr_status_display = gr.Textbox(label="PR Generation Status", interactive=False, value="Idle")
-        start_pr_btn = gr.Button("Start PR Generation (MCP)", elem_classes="action-button")
+        with gr.Row(): # Use gr.Row to place buttons side-by-side
+            preview_pr_btn = gr.Button("Preview PR (JSON)", elem_classes="secondary-button") # New button for preview
 
         with gr.TabItem("PR Generation Inputs", id=0):
             gr.Markdown("### 🔍 Reference PR Search")
@@ -127,11 +172,12 @@ def create_pr_agent_interface():
                 value="docs/source/en/accelerator_selection.md",
                 placeholder="e.g., docs/source/en/accelerator_selection.md",
             )
-            translated_doc_content_input = gr.Textbox(
-                label="📄 Translated Document Content",
-                value="# Translated Accelerator Selection\n\nThis is the translated content.",
-                lines=10,
+            translated_filepath_input = gr.Textbox(
+                label="📄 Translated Document File Path (e.g., path/to/translated_file.md)",
+                value="translation_result/docs/source/ko/accelerator_selection.md",
+                lines=1,
                 interactive=True,
+                placeholder="e.g., translation_result/docs/source/ko/accelerator_selection.md",
             )
             base_branch_input = gr.Textbox(
                 label="🌿 Base Branch (e.g., main)",
@@ -142,6 +188,14 @@ def create_pr_agent_interface():
                 label="PR Generation Raw JSON Output",
                 value={},
             )
+            
+            # New UI for human approval
+            with gr.Row():
+                confirmation_checkbox = gr.Checkbox(label="I approve this PR preview and wish to proceed with actual PR creation.", interactive=False)
+                confirm_pr_btn = gr.Button("Confirm & Create PR", elem_classes="action-button", interactive=False)
+
+        # Hidden state to store preview data
+        pr_preview_state = gr.State(value=None)
 
         search_pr_btn.click(
             fn=search_reference_pr_mcp,
@@ -149,16 +203,28 @@ def create_pr_agent_interface():
             outputs=[search_output, recommended_pr_url],
         )
 
-        start_pr_btn.click(
+        preview_pr_btn.click( # Modified click event for preview button
             fn=start_pr_generation_mcp,
             inputs=[
                 reference_pr_url_input,
                 target_language_input,
                 filepath_input,
-                translated_doc_content_input,
+                translated_filepath_input, # Changed to use the new filepath input
                 base_branch_input,
+                gr.State(True), # Pass True for preview_mode
             ],
-            outputs=[pr_status_display, pr_json_output],
+            outputs=[pr_status_display, pr_json_output, pr_preview_state, confirmation_checkbox, confirm_pr_btn],
+        ).success(
+            fn=lambda x: [gr.update(interactive=True), gr.update(interactive=True)], # Enable checkbox and button
+            inputs=pr_preview_state, # Use output from start_pr_generation_mcp to trigger
+            outputs=[confirmation_checkbox, confirm_pr_btn],
+            queue=False,
+        )
+
+        confirm_pr_btn.click( # New click event for confirm button
+            fn=lambda preview_data, approved: handle_pr_confirmation_mcp(preview_data, approved),
+            inputs=[pr_preview_state, confirmation_checkbox],
+            outputs=[pr_status_display, pr_json_output, confirmation_checkbox, confirm_pr_btn], # Reset checkbox and button state
         )
     return demo
 
diff --git a/pr_generator/agent.py b/pr_generator/agent.py
index 098986e..8924ca2 100644
--- a/pr_generator/agent.py
+++ b/pr_generator/agent.py
@@ -23,6 +23,7 @@
 # Library imports and error handling
 try:
     from github.GitRef import GitRef
+    from github import GithubException
     from langchain_anthropic import ChatAnthropic
 
     REQUIRED_LIBS_AVAILABLE = True
@@ -418,23 +419,110 @@ def get_branch_info(self, owner: str, repo_name: str, branch_name: str) -> str:
         except Exception as e:
             return f"Failed to retrieve branch information: {str(e)}"
 
+    def _prepare_pr_data(
+        self,
+        reference_pr_url: str,
+        target_language: str,
+        filepath: str,
+        translated_filepath: str, # Changed to accept filepath
+        base_branch: str = "main",
+    ) -> Dict[str, Any]:
+        """Prepare all data required for PR creation without making GitHub API calls."""
+        # 1. Analyze reference PR
+        pr_analysis = self.analyze_reference_pr(reference_pr_url)
+        if "error" in pr_analysis:
+            return {"status": "error", "message": pr_analysis["error"]}
+        
+        # Read the translated document content from the provided file path
+        translated_doc_content = ""
+        if translated_filepath:
+            print(f"DEBUG (agent.py -> _prepare_pr_data): Attempting to read translated file from: {translated_filepath}") # Added logging
+            try:
+                with open(translated_filepath, 'r', encoding='utf-8') as f:
+                    translated_doc_content = f.read()
+            except FileNotFoundError:
+                return {"status": "error", "message": f"Translated file not found: {translated_filepath}"}
+            except Exception as e:
+                return {"status": "error", "message": f"Error reading translated file {translated_filepath}: {str(e)}"}
+
+        # 2. Generate translation file path and branch name
+        target_filepath = filepath.replace("/en/", f"/{target_language}/")
+        file_name = filepath.split("/")[-1]
+
+        branch_name = self.generate_branch_name_from_reference(
+            pr_analysis["head_branch"], target_language, file_name
+        )
+
+        # 3. Generate commit message
+        commit_messages = [commit["message"] for commit in pr_analysis["commits"]]
+        commit_message = self.generate_commit_message_from_reference(
+            commit_messages, target_language, file_name
+        )
+
+        # 4. Generate PR title and body
+        pr_title, pr_body = self.generate_pr_content_from_reference(
+            pr_analysis["title"],
+            pr_analysis["body"],
+            target_language,
+            filepath,
+            target_filepath,
+            file_name,
+        )
+
+        return {
+            "status": "preview_ready",
+            "reference_pr_url": reference_pr_url,
+            "target_language": target_language,
+            "filepath": filepath,
+            "branch_name": branch_name,
+            "commit_message": commit_message,
+            "target_filepath": target_filepath,
+            "pr_title": pr_title,
+            "pr_body": pr_body,
+            "head_branch_for_pr": f"{self.user_owner}:{branch_name}",
+            "base_branch_for_pr": f"{self.base_owner}:{base_branch}",
+        }
+
     def run_translation_pr_workflow(
         self,
         reference_pr_url: str,
         target_language: str,
         filepath: str,
-        translated_doc: str,
+        translated_filepath: str, # Changed to accept filepath
         base_branch: str = "main",
+        preview_mode: bool = False,
     ) -> Dict[str, Any]:
         """Execute translation document PR creation workflow."""
         try:
+            if preview_mode:
+                print("🚀 Running in preview mode...")
+                preview_data = self._prepare_pr_data(
+                    reference_pr_url, target_language, filepath, translated_filepath, base_branch
+                )
+                if preview_data["status"] == "error":
+                    return preview_data # Return error from preparation
+                return {"status": "preview", "data": preview_data}
+
+
+            # If not in preview mode, read the translated content from the file
+            translated_doc_content = ""
+            if translated_filepath:
+                try:
+                    with open(translated_filepath, 'r', encoding='utf-8') as f:
+                        translated_doc_content = f.read()
+                except FileNotFoundError:
+                    error_message = f"❌ Translated file not found: {translated_filepath}"
+                    return {"status": "error", "message": error_message, "error_details": error_message}
+                except Exception as e:
+                    error_message = f"❌ Error reading translated file {translated_filepath}: {str(e)}"
+                    return {"status": "error", "message": error_message, "error_details": error_message}
+
             # 1. Analyze reference PR
             print(f"🔍 Analyzing reference PR: {reference_pr_url}")
             pr_analysis = self.analyze_reference_pr(reference_pr_url)
 
             if "error" in pr_analysis:
                 return {"status": "error", "message": pr_analysis["error"]}
-
             print("Reference PR analysis completed")
 
             # 2. Generate translation file path and branch name
@@ -483,7 +571,7 @@ def run_translation_pr_workflow(
                 self.user_repo,
                 target_filepath,
                 commit_message,
-                translated_doc,
+                translated_doc_content, # Pass the read content
                 branch_name,
             )
 
diff --git a/pr_generator/searcher.py b/pr_generator/searcher.py
index aebfc0b..d517db3 100644
--- a/pr_generator/searcher.py
+++ b/pr_generator/searcher.py
@@ -24,8 +24,7 @@
 # Langchain imports
 try:
     from langchain_anthropic import ChatAnthropic
-    from langchain_classic.agents import AgentExecutor
-    from langchain.agents import create_tool_calling_agent
+    from langchain.agents import create_tool_calling_agent, AgentExecutor # Explicitly import AgentExecutor
     from langchain_core.prompts import ChatPromptTemplate
 
     REQUIRED_LIBS_AVAILABLE = True

From 51cb24a29799dc6694a8143d5e3b070862d88140 Mon Sep 17 00:00:00 2001
From: wony617 <49024958+Jwaminju@users.noreply.github.com>
Date: Mon, 24 Nov 2025 04:08:01 -0800
Subject: [PATCH 4/4] Delete wrong files

---
 .../docs/source/en/accelerator_selection.md   | 127 ------------------
 1 file changed, 127 deletions(-)
 delete mode 100644 translation_result/docs/source/en/accelerator_selection.md

diff --git a/translation_result/docs/source/en/accelerator_selection.md b/translation_result/docs/source/en/accelerator_selection.md
deleted file mode 100644
index 58cbfa1..0000000
--- a/translation_result/docs/source/en/accelerator_selection.md
+++ /dev/null
@@ -1,127 +0,0 @@
-<!--Copyright 2025 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contains specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
-
-# 가속기 선택 [[accelerator-selection]]
-
-분산 학습 중에는 사용할 가속기(CUDA, XPU, MPS, HPU 등)의 수와 순서를 지정할 수 있습니다. 이는 서로 다른 컴퓨팅 성능을 가진 가속기가 있을 때 더 빠른 가속기를 먼저 사용하고 싶은 경우에 유용할 수 있습니다. 또는 사용 가능한 가속기의 일부만 사용할 수도 있습니다. 선택 과정은 [DistributedDataParallel](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html)과 [DataParallel](https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html) 모두에서 작동합니다. Accelerate나 [DeepSpeed integration](./main_classes/deepspeed)는 필요하지 않습니다.
-
-이 가이드는 사용할 가속기의 수와 사용 순서를 선택하는 방법을 보여줍니다.
-
-## 가속기 수 [[number-of-accelerators]]
-
-예를 들어, 4개의 가속기가 있고 처음 2개만 사용하고 싶다면 아래 명령을 실행하세요.
-
-<hfoptions id="select-accelerator">
-<hfoption id="torchrun">
-
-`--nproc_per_node`를 사용하여 사용할 가속기 수를 선택합니다.
-
-```bash
-torchrun --nproc_per_node=2  trainer-program.py ...
-```
-
-</hfoption>
-<hfoption id="Accelerate">
-
-`--num_processes`를 사용하여 사용할 가속기 수를 선택합니다.
-
-```bash
-accelerate launch --num_processes 2 trainer-program.py ...
-```
-
-</hfoption>
-<hfoption id="DeepSpeed">
-
-`--num_gpus`를 사용하여 사용할 GPU 수를 선택합니다.
-
-```bash
-deepspeed --num_gpus 2 trainer-program.py ...
-```
-
-</hfoption>
-</hfoptions>
-
-## 가속기 순서 [[order-of-accelerators]]
-사용할 특정 가속기와 그 순서를 선택하려면 하드웨어에 적합한 환경 변수를 사용하세요. 이는 종종 각 실행에 대해 명령줄에서 설정되지만, `~/.bashrc`나 다른 시작 구성 파일에 추가할 수도 있습니다.
-
-예를 들어, 4개의 가속기(0, 1, 2, 3)가 있고 가속기 0과 2만 실행하고 싶다면:
-
-<hfoptions id="accelerator-type">
-<hfoption id="CUDA">
-
-```bash
-CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py ...
-```
-
-GPU 0과 2만 PyTorch에서 "보이며" 각각 `cuda:0`과 `cuda:1`로 매핑됩니다.  
-순서를 바꾸려면 (GPU 2를 `cuda:0`으로, GPU 0을 `cuda:1`로 사용):
-
-
-```bash
-CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py ...
-```
-
-GPU 없이 실행하려면:
-
-```bash
-CUDA_VISIBLE_DEVICES= python trainer-program.py ...
-```
-
-`CUDA_DEVICE_ORDER`를 사용하여 CUDA 장치의 순서를 제어할 수도 있습니다:
-
-- PCIe 버스 ID 순서 (`nvidia-smi`와 일치):
-
-    ```bash
-$hf_i18n_placeholder21export CUDA_DEVICE_ORDER=PCI_BUS_ID
-    ```
-
-- 컴퓨팅 성능 순서 (가장 빠른 것부터):
-
-    ```bash
-    export CUDA_DEVICE_ORDER=FASTEST_FIRST
-    ```
-
-</hfoption>
-<hfoption id="Intel XPU">
-
-```bash
-ZE_AFFINITY_MASK=0,2 torchrun trainer-program.py ...
-```
-
-XPU 0과 2만 PyTorch에서 "보이며" 각각 `xpu:0`과 `xpu:1`로 매핑됩니다.  
-순서를 바꾸려면 (XPU 2를 `xpu:0`으로, XPU 0을 `xpu:1`로 사용):
-
-```bash
-ZE_AFFINITY_MASK=2,0 torchrun trainer-program.py ...
-```
-
-
-다음을 사용하여 Intel XPU의 순서를 제어할 수도 있습니다:
-
-```bash
-export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
-```
-
-Intel XPU에서의 장치 열거 및 정렬에 대한 자세한 정보는 [Level Zero](https://github.com/oneapi-src/level-zero/blob/master/README.md?plain=1#L87) 문서를 참조하세요.
-
-</hfoption>
-</hfoptions>
-
-
-
-> [!WARNING]
-> 환경 변수는 명령줄에 추가하는 대신 내보낼 수 있습니다. 환경 변수가 어떻게 설정되었는지 잊어버리고 잘못된 가속기를 사용하게 될 수 있어 혼란을 야기할 수 있으므로 권장하지 않습니다. 대신, 같은 명령줄에서 특정 훈련 실행을 위해 환경 변수를 설정하는 것이 일반적인 관례입니다.
-```
\ No newline at end of file