Cisco-Talos · DavidJBianco · Apr 27, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/Makefile b/Makefile
@@ -43,8 +43,10 @@ coverage-html: coverage
 	open htmlcov/index.html || echo "Open htmlcov/index.html in your browser to view the coverage report."
 
 .PHONY: container-local
+DOCKER_TAG := $(shell git branch --show-current | tr -c '[:alnum:]._-' '-')
+
 container-local:
-	docker buildx build -t ghcr.io/cisco-foundation-ai/peak-assistant:$(shell git branch --show-current) --load .
+	docker buildx build -t ghcr.io/cisco-foundation-ai/peak-assistant:$(DOCKER_TAG) --load .
 
 
 .PHONY: container-run
@@ -56,4 +58,4 @@ container-run: container-local
 		--mount "type=bind,src=$(PWD)/.env,target=/home/peakassistant/.env" \
 		--mount "type=bind,src=$(PWD)/mcp_servers.json,target=/home/peakassistant/mcp_servers.json" \
 		-p "127.0.0.1:8501:8501" \
-		ghcr.io/cisco-foundation-ai/peak-assistant:$(shell git branch --show-current)
+		ghcr.io/cisco-foundation-ai/peak-assistant:$(DOCKER_TAG)
diff --git a/evaluations/research-agent-team-eval/evaluator.py b/evaluations/research-agent-team-eval/evaluator.py
@@ -36,6 +36,7 @@
 import argparse
 import asyncio
 import aiohttp
+import ipaddress
 from typing import Any, Dict, List, Tuple, Optional
 from collections import defaultdict
 from dataclasses import dataclass, field
@@ -985,8 +986,46 @@ async def evaluate_url_validity_async(
 
         urls = re.findall(r"https?://[^\s\)]+", report)
 
+        def is_safe_public_url(url: str) -> bool:
+            """Block private/internal URL targets to reduce SSRF risk."""
+            try:
+                parsed = urlparse(url)
+                if parsed.scheme not in {"http", "https"}:
+                    return False
+                if not parsed.hostname:
+                    return False
+
+                hostname = parsed.hostname.strip("[]").lower()
+                if hostname in {"localhost", "localhost.localdomain"}:
+                    return False
+                if hostname.endswith(".local"):
+                    return False
+
+                try:
+                    ip = ipaddress.ip_address(hostname)
+                    if (
+                        ip.is_private
+                        or ip.is_loopback
+                        or ip.is_link_local
+                        or ip.is_multicast
+                        or ip.is_reserved
+                        or ip.is_unspecified
+                    ):
+                        return False
+                except ValueError:
+                    # Hostname is not a direct IP literal; keep it eligible.
+                    pass
+
+                return True
+            except Exception:
+                return False
+
+        safe_urls = [url for url in urls if is_safe_public_url(url)]
+        blocked_urls = [url for url in urls if not is_safe_public_url(url)]
+
         results = {
             "total_urls": len(urls),
+            "blocked_urls": len(blocked_urls),
             "valid_urls": 0,
             "invalid_urls": 0,
             "timeout_urls": 0,
@@ -995,19 +1034,19 @@ async def evaluate_url_validity_async(
         }
 
         # Decide which URLs to check
-        if len(urls) <= 20:
+        if len(safe_urls) <= 20:
             # Check all URLs if 20 or fewer
-            urls_to_check = urls
-            results["sample_size"] = len(urls)
+            urls_to_check = safe_urls
+            results["sample_size"] = len(safe_urls)
         else:
             # Random sample of 20 if more than 20
-            urls_to_check = random.sample(urls, 20)
+            urls_to_check = random.sample(safe_urls, 20)
             results["sample_size"] = 20
 
         async def check_url(session, url):
             try:
                 async with session.head(
-                    url, timeout=5, allow_redirects=True
+                    url, timeout=5, allow_redirects=False
                 ) as response:
                     if response.status < 400:
                         return url, "valid"
@@ -1039,13 +1078,15 @@ async def check_url(session, url):
             score = 0
 
         # Update feedback to indicate sampling
-        if len(urls) > 20:
-            feedback = f"{results['valid_urls']}/{results['sample_size']} URLs valid (random sample from {len(urls)} total)"
+        if len(safe_urls) > 20:
+            feedback = f"{results['valid_urls']}/{results['sample_size']} URLs valid (random sample from {len(safe_urls)} eligible)"
         else:
             feedback = f"{results['valid_urls']}/{results['sample_size']} URLs valid"
 
         if results["broken_links"]:
             feedback += f", {len(results['broken_links'])} broken"
+        if blocked_urls:
+            feedback += f", skipped {len(blocked_urls)} private/internal URL(s)"
 
         return MetricResult(score=score, details=results, feedback=feedback)
 

diff --git a/peak_assistant/able_assistant/__init__.py b/peak_assistant/able_assistant/__init__.py
@@ -20,7 +20,7 @@
 #
 # SPDX-License-Identifier: MIT
 
-from typing import List
+from typing import List, Optional
 
 from autogen_core.models import UserMessage, SystemMessage
 
@@ -32,7 +32,7 @@ async def able_table(
     research_document: str,
     local_data_document: str,
     local_context: str,
-    previous_run: list[SystemMessage | UserMessage] = list(),
+    previous_run: Optional[list[SystemMessage | UserMessage]] = None,
 ) -> str:
     """
     Generate a PEAK ABLE table based on the given hypothesis and research document.

diff --git a/peak_assistant/data_assistant/__init__.py b/peak_assistant/data_assistant/__init__.py
@@ -20,6 +20,8 @@
 #
 # SPDX-License-Identifier: MIT
 
+from typing import Optional
+
 from autogen_agentchat.base import TaskResult
 from autogen_agentchat.messages import TextMessage
 from autogen_agentchat.agents import AssistantAgent
@@ -38,7 +40,7 @@ async def identify_data_sources(
     able_info: str,
     local_context: str,
     verbose: bool = False,
-    previous_run: list = list(),
+    previous_run: Optional[list] = None,
     mcp_server_group: str = "data_discovery",
     msg_preprocess_callback=None,
     msg_preprocess_kwargs=None,

diff --git a/peak_assistant/data_assistant/__main__.py b/peak_assistant/data_assistant/__main__.py
@@ -87,6 +87,11 @@ def main() -> None:
         action="store_true",
         help="Skip user feedback and automatically accept the generated data discovery report"
     )
+    parser.add_argument(
+        "--debug-agents",
+        action="store_true",
+        help="Enable agent debug logging to msgs.txt and results.txt"
+    )
     args = parser.parse_args()
 
     # Load environment variables
@@ -156,6 +161,16 @@ def main() -> None:
             exit(1)
 
     messages: List[TextMessage] = list()
+    debug_agents_opts = dict()
+
+    if args.debug_agents:
+        debug_agents_opts = {
+            "msg_preprocess_callback": preprocess_messages_logging,
+            "msg_preprocess_kwargs": {"agent_id": "data-discovery"},
+            "msg_postprocess_callback": postprocess_messages_logging,
+            "msg_postprocess_kwargs": {"agent_id": "data-discovery"},
+        }
+
     while True:
         # Run the hypothesizer asynchronously
         data_sources = asyncio.run(
@@ -167,10 +182,7 @@ def main() -> None:
                 local_context=local_context,
                 verbose=args.verbose,
                 previous_run=messages,
-                msg_preprocess_callback=preprocess_messages_logging,
-                msg_preprocess_kwargs={"agent_id": "data-discovery"},
-                msg_postprocess_callback=postprocess_messages_logging,
-                msg_postprocess_kwargs={"agent_id": "data-discovery"},
+                **debug_agents_opts,
             )
         )
 

diff --git a/peak_assistant/hypothesis_assistant/hypothesis_refiner_cli.py b/peak_assistant/hypothesis_assistant/hypothesis_refiner_cli.py
@@ -25,7 +25,8 @@
 import os
 import sys
 import argparse
-from typing import List
+import traceback
+from typing import List, Optional
 from dotenv import load_dotenv
 import asyncio
 
@@ -51,7 +52,7 @@ async def refiner(
     research_document: str,
     local_data_document: str,
     verbose: bool = False,
-    previous_run: list = list(),
+    previous_run: Optional[list] = None,
     msg_preprocess_callback=None,
     msg_preprocess_kwargs=None,
     msg_postprocess_callback=None,
@@ -354,24 +355,19 @@ async def refiner(
             msgs=messages, **(msg_preprocess_kwargs or {})
         )
 
-    try:
-        # Run the team asynchronously
-        if verbose:
-            result = await Console(team.run_stream(task=messages), output_stats=True)
-        else:
-            result = await team.run(task=messages)
+    # Run the team asynchronously
+    if verbose:
+        result = await Console(team.run_stream(task=messages), output_stats=True)
+    else:
+        result = await team.run(task=messages)
 
-        # Postprocess the result
-        if msg_postprocess_callback:
-            result = msg_postprocess_callback(
-                result=result, **(msg_postprocess_kwargs or {})
-            )
+    # Postprocess the result
+    if msg_postprocess_callback:
+        result = msg_postprocess_callback(
+            result=result, **(msg_postprocess_kwargs or {})
+        )
 
-        # Access the content from the CreateResult object
-        return result  # Use the correct attribute to access the generated content
-    except Exception as e:
-        print(f"Error while refining hypotheses: {e}")
-        raise Exception("An error occurred while refining the hypothesis.") from e
+    return result
 
 
 def main() -> None:
@@ -416,6 +412,11 @@ def main() -> None:
         help="Skip user feedback and automatically accept the refined hypothesis",
         default=False,
     )
+    parser.add_argument(
+        "--debug-agents",
+        action="store_true",
+        help="Enable agent debug logging to msgs.txt and results.txt",
+    )
 
     # Parse the arguments
     args = parser.parse_args()
@@ -480,22 +481,35 @@ def main() -> None:
 
     messages: List[TextMessage] = list()
     current_hypothesis = args.hypothesis
+    debug_agents_opts = dict()
+
+    if args.debug_agents:
+        debug_agents_opts = {
+            "msg_preprocess_callback": preprocess_messages_logging,
+            "msg_preprocess_kwargs": {"agent_id": "hypothesis-refiner"},
+            "msg_postprocess_callback": postprocess_messages_logging,
+            "msg_postprocess_kwargs": {"agent_id": "hypothesis-refiner"},
+        }
+
     while True:
         # Run the hypothesizer asynchronously
-        response = asyncio.run(
-            refiner(
-                hypothesis=current_hypothesis,
-                local_context=local_context or "",
-                research_document=research_data,
-                local_data_document=local_data or "",
-                verbose=args.verbose,
-                previous_run=messages,
-                msg_preprocess_callback=preprocess_messages_logging,
-                msg_preprocess_kwargs={"agent_id": "hypothesis-refiner"},
-                msg_postprocess_callback=postprocess_messages_logging,
-                msg_postprocess_kwargs={"agent_id": "hypothesis-refiner"},
+        try:
+            response = asyncio.run(
+                refiner(
+                    hypothesis=current_hypothesis,
+                    local_context=local_context or "",
+                    research_document=research_data,
+                    local_data_document=local_data or "",
+                    verbose=args.verbose,
+                    previous_run=messages,
+                    **debug_agents_opts,
+                )
             )
-        )
+        except Exception as e:
+            print(f"Error refining hypothesis: {e}", file=sys.stderr)
+            if args.verbose:
+                traceback.print_exc()
+            sys.exit(1)
 
         # Extract the refined hypothesis using the centralized extractor
         current_hypothesis, acceptance_msg = extract_refined_hypothesis(response, original_hypothesis=current_hypothesis)

diff --git a/peak_assistant/peak_mcp/__main__.py b/peak_assistant/peak_mcp/__main__.py
@@ -296,7 +296,12 @@ async def hypothesizer(
     """
     try:
         user_input = ""
-        result = await async_hypothesizer(user_input, research_document, local_context, local_data_search_results)
+        result = await async_hypothesizer(
+            user_input=user_input,
+            research_document=research_document,
+            local_data_document=local_data_search_results,
+            local_context=local_context,
+        )
         return embeddable_object(data=result)
     except Exception as e:
         return embeddable_object(data=f"Error during hypothesis generation: {str(e)}")

diff --git a/peak_assistant/planning_assistant/__init__.py b/peak_assistant/planning_assistant/__init__.py
@@ -20,6 +20,8 @@
 #
 # SPDX-License-Identifier: MIT
 
+from typing import Optional
+
 from autogen_agentchat.messages import TextMessage
 from autogen_agentchat.agents import AssistantAgent
 from autogen_agentchat.teams import RoundRobinGroupChat
@@ -38,7 +40,7 @@ async def plan_hunt(
     data_discovery: str,
     local_context: str,
     verbose: bool = False,
-    previous_run: list = list(),
+    previous_run: Optional[list] = None,
     msg_preprocess_callback=None,
     msg_preprocess_kwargs=None,
     msg_postprocess_callback=None,

diff --git a/peak_assistant/planning_assistant/__main__.py b/peak_assistant/planning_assistant/__main__.py
@@ -95,6 +95,11 @@ def main() -> None:
         action="store_true",
         help="Skip user feedback and automatically accept the generated hunt plan"
     )
+    parser.add_argument(
+        "--debug-agents",
+        action="store_true",
+        help="Enable agent debug logging to msgs.txt and results.txt"
+    )
     args = parser.parse_args()
 
     # Load environment variables
@@ -177,6 +182,16 @@ def main() -> None:
             exit(1)
 
     messages: List[TextMessage] = list()
+    debug_agents_opts = dict()
+
+    if args.debug_agents:
+        debug_agents_opts = {
+            "msg_preprocess_callback": preprocess_messages_logging,
+            "msg_preprocess_kwargs": {"agent_id": "hunt-planner"},
+            "msg_postprocess_callback": postprocess_messages_logging,
+            "msg_postprocess_kwargs": {"agent_id": "hunt-planner"},
+        }
+
     while True:
         # Run the hypothesizer asynchronously
         data_sources = asyncio.run(
@@ -189,10 +204,7 @@ def main() -> None:
                 local_context=local_context or "",
                 verbose=args.verbose,
                 previous_run=messages,
-                msg_preprocess_callback=preprocess_messages_logging,
-                msg_preprocess_kwargs={"agent_id": "hunt-planner"},
-                msg_postprocess_callback=postprocess_messages_logging,
-                msg_postprocess_kwargs={"agent_id": "hunt-planner"},
+                **debug_agents_opts,
             )
         )