From cfa52cb2eae713e3d368847036cded11633cb90c Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 31 May 2025 14:01:14 -0700 Subject: [PATCH 01/12] add tinydb and a model config store adds the crud operations for model configs --- core/data_store/__init__.py | 0 core/data_store/model_config_store.py | 88 +++++++++++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 89 insertions(+) create mode 100644 core/data_store/__init__.py create mode 100644 core/data_store/model_config_store.py diff --git a/core/data_store/__init__.py b/core/data_store/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/data_store/model_config_store.py b/core/data_store/model_config_store.py new file mode 100644 index 0000000..f25f21b --- /dev/null +++ b/core/data_store/model_config_store.py @@ -0,0 +1,88 @@ +from pathlib import Path +from tinydb import TinyDB, Query +from typing import List, Dict, Any, Optional +import uuid + +DB_DIR = Path.home() / ".compliant-llm" +CONFIG_DB_FILE = DB_DIR / "model_config.json" + +# Ensure the .compliant-llm directory exists +DB_DIR.mkdir(parents=True, exist_ok=True) + +_db_instance = None + +def _get_table(): + global _db_instance + if _db_instance is None: + _db_instance = TinyDB(CONFIG_DB_FILE) + return _db_instance.table('model_config') + +def save_config(runner_config_data: Dict[str, Any], profile_name: str | None = None) -> None: + """Saves or updates a model configuration profile.""" + table = _get_table() + + # Ensure 'id' exists, add if not + if 'id' not in runner_config_data: + runner_config_data['id'] = str(uuid.uuid4()) + + # Ensure 'past_runs' exists if it's a new config or not present + if 'past_runs' not in runner_config_data: + runner_config_data['past_runs'] = [] + + # Add/update profile_name within the document for easier access if needed + if profile_name is not None: + runner_config_data['profile_name'] = profile_name + document_to_store = runner_config_data + + ConfigQuery = Query() + table.upsert(document_to_store, ConfigQuery.id == runner_config_data['id']) + print(f"Config '{profile_name}' saved.") + +def get_config(id: str) -> Optional[Dict[str, Any]]: + """Retrieves a specific model configuration profile.""" + table = _get_table() + ConfigQuery = Query() + return table.get(ConfigQuery.id == id) + +def list_configs() -> List[Dict[str, Any]]: + """Lists all saved model configuration profiles.""" + table = _get_table() + return table.all() + +def delete_config(id: str) -> bool: + """Deletes a model configuration profile. Returns True if deleted.""" + table = _get_table() + ConfigQuery = Query() + deleted_ids = table.remove(ConfigQuery.id == id) + return len(deleted_ids) > 0 + +def add_report_to_config(id: str, report_file_path: str) -> bool: + """Adds a report file path to the 'past_runs' list of a specific config.""" + table = _get_table() + ConfigQuery = Query() + config_doc = table.get(ConfigQuery.id == id) + + if not config_doc: + print(f"Error: Config profile '{id}' not found.") + return False + + # Ensure past_runs is a list + if 'past_runs' not in config_doc or not isinstance(config_doc['past_runs'], list): + config_doc['past_runs'] = [] + + # Avoid duplicate entries + if report_file_path not in config_doc['past_runs']: + config_doc['past_runs'].append(report_file_path) + table.upsert(config_doc, ConfigQuery.id == id) + print(f"Report '{report_file_path}' added to config '{id}'.") + return True + else: + print(f"Report '{report_file_path}' already exists in config '{id}'.") + return False + +def close_db(): + """Closes the database connection.""" + global _db_instance + if _db_instance: + _db_instance.close() + _db_instance = None diff --git a/pyproject.toml b/pyproject.toml index 30845dc..266e6c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "aiofiles", "plotly", "psutil", + "tinydb", "markdown", "opentelemetry-api", "opentelemetry-sdk", From 9b2ed057fb58118e58676d2e6ea9023eadb47367 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sun, 1 Jun 2025 17:59:14 -0700 Subject: [PATCH 02/12] use the model config store in the ui adapter wip: changes to the dashboard to test the ui adapter changes --- core/config_manager/ui_adapter.py | 143 ++++++++++++++++++++++-------- ui/dashboard.py | 67 ++++++++++---- 2 files changed, 158 insertions(+), 52 deletions(-) diff --git a/core/config_manager/ui_adapter.py b/core/config_manager/ui_adapter.py index 7ecf3d2..ba82865 100644 --- a/core/config_manager/ui_adapter.py +++ b/core/config_manager/ui_adapter.py @@ -6,6 +6,7 @@ from typing import Dict, List, Any, Optional from .config import ConfigManager, DEFAULT_REPORTS_DIR from core.runner import execute_prompt_tests_with_orchestrator +from core.data_store import model_config_store from rich.console import Console from rich.progress import ( Progress, SpinnerColumn, TextColumn, TimeElapsedColumn @@ -30,45 +31,87 @@ def __init__(self, config_manager: Optional[ConfigManager] = None): "output_path": {"path": str(DEFAULT_REPORTS_DIR), "filename": "report"}, # Default output path } - def run_test(self, prompt: str, strategies: List[str], config: Dict[str, Any]) -> Dict[str, Any]: + def run_test(self, config_id: str, prompt_override: Optional[str] = None, strategies_override: Optional[List[str]] = None) -> Dict[str, Any]: """ - Run tests with UI-specific configuration. - + Run tests using a stored configuration profile, with optional overrides. + Args: - prompt: The system prompt to test - strategies: List of test strategies to use - + config_id: The ID of the configuration profile to use. + prompt_override: Optional new prompt content to use for this run. + strategies_override: Optional new list of strategies to use for this run. + Returns: - Dictionary containing test results - + Dictionary containing test results. + Raises: - ValueError: If required parameters are missing + ValueError: If the configuration profile is not found or required parameters are missing. """ - if not prompt: - raise ValueError("Prompt is required") - if not strategies: - raise ValueError("At least one strategy is required") + full_runner_config = self.get_profile(config_id) + if not full_runner_config: + raise ValueError(f"Configuration profile with ID '{config_id}' not found.") + + # Make a copy to avoid modifying the stored config directly with temporary overrides + test_run_config = full_runner_config.copy() + test_run_config['provider'] = test_run_config.get('provider', {}).copy() # Ensure provider dict is also a copy + + # Apply overrides if any + if prompt_override is not None: + # Assuming prompt is stored as {'content': '...'} + if 'prompt' not in test_run_config or not isinstance(test_run_config.get('prompt'), dict): + test_run_config['prompt'] = {} + test_run_config['prompt']['content'] = prompt_override + # If your runner_config stores prompt directly as a string: + # test_run_config['prompt'] = prompt_override + + if strategies_override is not None: + test_run_config['strategies'] = strategies_override + + # Ensure essential keys are present (they should be from the stored config) + if 'prompt' not in test_run_config or (isinstance(test_run_config.get('prompt'), dict) and 'content' not in test_run_config.get('prompt', {})): + if prompt_override is None: # only raise if no override was given + raise ValueError("Prompt content is missing in the configuration and no override provided.") + if 'strategies' not in test_run_config or not test_run_config['strategies']: + if strategies_override is None: # only raise if no override was given + raise ValueError("Strategies are missing in the configuration and no override provided.") + if 'provider_name' not in test_run_config or 'provider' not in test_run_config or 'model' not in test_run_config['provider']: + raise ValueError("Provider information (provider_name, model) is missing in the configuration.") + + # To be deleted: API key will be loaded from tinydb + # API Key Handling - API key is NOT stored, fetched at runtime + # provider_details = test_run_config.get('provider', {}) + # The provider_name in runner_config might be just 'openai', not 'openai/gpt-4o' + # The model field usually contains the specific model like 'gpt-4o' + # The API key usually depends on the base provider (e.g., OPENAI_API_KEY) + # Let's assume 'provider_name' in the config root is the base provider e.g. 'openai' + # base_provider_name = test_run_config.get('provider_name', '').split('/')[0] + # api_key_env_var = f"{base_provider_name.upper()}_API_KEY" + # api_key = os.getenv(api_key_env_var) or get_key(".env", api_key_env_var) - # Create test configuration - api_key_key = f"{config['provider_name'].upper()}_API_KEY" - api_key = os.getenv(api_key_key, 'n/a') or get_key(".env", api_key_key) - - test_config = { - "prompt": {"content": prompt}, - "strategies": strategies, - "provider": { - "provider_name": f"{config['provider_name']}/{config['model']}", - "model": f"{config['provider_name']}/{config['model']}", - "api_key": api_key, - }, - "temperature": self.default_config["temperature"], - "timeout": self.default_config["timeout"], - "max_tokens": self.default_config["max_tokens"], - "output_path": self.default_config["output_path"] - } + # if not api_key or api_key == 'n/a': + # print(f"Warning: API key for {base_provider_name.upper()} not found in environment variables or .env file.") + # Decide if this is a fatal error or if the runner handles it + + # test_run_config['provider']['api_key'] = api_key + # Ensure provider_name and model in the provider dict are correctly set for the runner + # The runner might expect provider_name to be like 'openai/gpt-4o' + # test_run_config['provider']['provider_name'] = f"{base_provider_name}/{test_run_config['provider']['model']}" + # This depends on what execute_prompt_tests_with_orchestrator expects for provider_name within the provider dict. + # Based on original code, it seems it expects 'provider_name': 'openai/gpt-4o', 'model': 'openai/gpt-4o' + # Let's ensure the 'provider' dict has the combined name for 'provider_name' and 'model' if not already structured that way. + # The ConfigManager.get_runner_config already structures it like: + # 'provider_name': 'openai', (at root) + # 'provider': {'provider_name': 'openai', 'model': 'openai', 'api_key': ...} + # The original UIConfigAdapter.run_test used: + # 'provider_name': f"{config['provider_name']}/{config['model']}" + # 'model': f"{config['provider_name']}/{config['model']}" + # Let's assume the stored runner_config has 'provider_name' (e.g. 'openai') at root, + # and 'provider': {'model': 'gpt-4o'} at least. + # We need to ensure the 'provider' dict passed to orchestrator is what it expects. + # The orchestrator likely uses the 'provider_name' from the root of test_run_config for LiteLLM. + console = Console() - console.print(f"[bold cyan]Running test with config: {test_config}[/]") - + console.print(f"[bold cyan]Running test with profile ID '{config_id}':[/]") + console.print(f"[bold cyan]Effective config for run: {test_run_config}[/]") with Progress( SpinnerColumn(), @@ -76,16 +119,46 @@ def run_test(self, prompt: str, strategies: List[str], config: Dict[str, Any]) - TimeElapsedColumn(), ) as progress: task = progress.add_task("[cyan]Testing prompt security", total=None) - report_data = execute_prompt_tests_with_orchestrator(test_config) + # Pass the fully prepared test_run_config + report_data = execute_prompt_tests_with_orchestrator(test_run_config) progress.update(task, completed=True) console.print("[bold green]Tests completed successfully![/]") - console.print(f"[bold cyan]Report saved successfully at {report_data['report_metadata']['path']}[/]") + report_file_path = report_data.get('report_metadata', {}).get('path') + if report_file_path: + console.print(f"[bold cyan]Report saved successfully at {report_file_path}[/]") + # Add report to config's past_runs + model_config_store.add_report_to_config(config_id, str(report_file_path)) + else: + console.print("[bold yellow]Report path not found in report data. Cannot link to profile.[/]") console.print("\n") - # Execute the test with orchestrator return report_data + + # --- Profile Management Methods --- + + def save_profile(self, runner_config_data: Dict[str, Any], profile_name: Optional[str] = None) -> str: + """Saves a new profile or updates an existing one based on the presence of 'id' in runner_config_data. + Returns the ID of the saved/updated configuration. + """ + # model_config_store.save_config ensures 'id' is present and handles profile_name + model_config_store.save_config(runner_config_data, profile_name=profile_name) + return runner_config_data['id'] # 'id' is guaranteed by save_config + + def get_profile(self, config_id: str) -> Optional[Dict[str, Any]]: + """Retrieves a specific configuration profile by its ID.""" + return model_config_store.get_config(config_id) + + def list_profiles(self) -> List[Dict[str, Any]]: + """Lists all saved configuration profiles.""" + return model_config_store.list_configs() + + def delete_profile(self, config_id: str) -> bool: + """Deletes a configuration profile by its ID. Returns True if deleted.""" + return model_config_store.delete_config(config_id) + # --- Existing Methods for Default/Unsaved Config --- + def update_config(self, config: Dict[str, Any]) -> None: """ Update the default configuration. diff --git a/ui/dashboard.py b/ui/dashboard.py index e295c9a..6755664 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -145,6 +145,11 @@ def create_app_ui(): st.title("Compliant LLM UI") st.write("Test and analyze your AI prompts for security vulnerabilities") + adapter = UIConfigAdapter() + # Initialize session state for selected profile if not already present + if 'selected_profile_id' not in st.session_state: + st.session_state.selected_profile_id = None + # sidebar of main page with st.sidebar: if st.button("Open Documentation"): @@ -154,24 +159,52 @@ def create_app_ui(): except Exception as e: st.error(f"Error opening documentation: {str(e)}") - st.header("Test Reports") - reports = get_reports() - if not reports: - st.info("No reports found. Run a test to generate reports.") + st.sidebar.title("Model Profiles") + profiles = adapter.list_profiles() # Uses UIConfigAdapter.list_profiles() + + # Create a mapping from profile ID to display name (profile_name or truncated ID) + profile_options = {profile['id']: profile.get('profile_name', f"Profile {profile['id'][:8]}") for profile in profiles} + + if not profiles: + st.sidebar.info("No saved profiles found.") + # Ensure selected_profile_id is None if no profiles exist or selection is cleared + if st.session_state.selected_profile_id is not None: + st.session_state.selected_profile_id = None + # st.experimental_rerun() # Optional: rerun to clear main panel if a profile was deleted elsewhere else: - st.write("### Recent Reports") - for i, report in enumerate(reports): - formatted_time = report['timestamp'].strftime('%Y-%m-%d %H:%M:%S') - if st.button( - f"Report {i+1}. (Runtime: {report['runtime']}, Run at: {formatted_time})", - key=f"report_{report['name']}"): - selected_report_path = report['path'] - st.session_state['selected_report'] = selected_report_path - st.rerun() - - if 'selected_report' in st.session_state: - open_dashboard_with_report(st.session_state['selected_report']) - del st.session_state['selected_report'] + # Use a temporary variable for selectbox to detect change, then update session_state + # This helps manage reruns more explicitly if needed. + current_selection_in_selectbox = st.sidebar.selectbox( + "Select a Profile:", + options=list(profile_options.keys()), # Pass IDs as options + format_func=lambda id_key: profile_options[id_key], # Use the map for display names + index=None if not st.session_state.selected_profile_id or st.session_state.selected_profile_id not in profile_options else list(profile_options.keys()).index(st.session_state.selected_profile_id), + key="profile_selector_widget" # A unique key for the widget itself + ) + if st.session_state.selected_profile_id != current_selection_in_selectbox: + st.session_state.selected_profile_id = current_selection_in_selectbox + st.experimental_rerun() # Rerun to update the main panel with the new selection + + ## TODO: this code needs to be modified and moved to a model config page, where we + ## need to load the past test runs for the config. + # st.header("Test Reports") + # reports = get_reports() + # if not reports: + # st.info("No reports found. Run a test to generate reports.") + # else: + # st.write("### Recent Reports") + # for i, report in enumerate(reports): + # formatted_time = report['timestamp'].strftime('%Y-%m-%d %H:%M:%S') + # if st.button( + # f"Report {i+1}. (Runtime: {report['runtime']}, Run at: {formatted_time})", + # key=f"report_{report['name']}"): + # selected_report_path = report['path'] + # st.session_state['selected_report'] = selected_report_path + # st.rerun() + + # if 'selected_report' in st.session_state: + # open_dashboard_with_report(st.session_state['selected_report']) + # del st.session_state['selected_report'] # Form for entering keys From 195d943f1cd8a3d5aa3f91904d9c4a5644a20d2a Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 14:39:05 +0530 Subject: [PATCH 03/12] chore: save and list profiles --- requirements.txt | 1 + ui/dashboard.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c8e60ac..9707d91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ aiofiles plotly psutil markdown +tinydb opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp diff --git a/ui/dashboard.py b/ui/dashboard.py index 6755664..f1ffb09 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -1,5 +1,7 @@ +import profile import sys import time +import uuid import psutil import subprocess from pathlib import Path @@ -269,7 +271,10 @@ def create_app_ui(): config[field] = val st.session_state['saved_config'] = config - st.write("Configuration saved successfully", config) + profile_name = provider_name + "_" + model + "_" + str(uuid.uuid4()) + adapter.save_profile(config, profile_name) + st.write("Profile saved successfully", profile_name) + st.write("Config saved successfully", config) # Form for running tests with st.expander("Run New Test", expanded=True): From 169dee43a587b5973c05e248b947514327709369 Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 17:42:28 +0530 Subject: [PATCH 04/12] chore: List past runs , selected profile / config etc --- core/config_manager/ui_adapter.py | 40 +------ core/reporter.py | 3 +- core/runner.py | 2 +- ui/dashboard.py | 169 +++++++++++++++++++++--------- 4 files changed, 126 insertions(+), 88 deletions(-) diff --git a/core/config_manager/ui_adapter.py b/core/config_manager/ui_adapter.py index ba82865..f441dd2 100644 --- a/core/config_manager/ui_adapter.py +++ b/core/config_manager/ui_adapter.py @@ -52,7 +52,7 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate # Make a copy to avoid modifying the stored config directly with temporary overrides test_run_config = full_runner_config.copy() - test_run_config['provider'] = test_run_config.get('provider', {}).copy() # Ensure provider dict is also a copy + test_run_config['provider'] = test_run_config # Ensure provider dict is also a copy # Apply overrides if any if prompt_override is not None: @@ -60,11 +60,11 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate if 'prompt' not in test_run_config or not isinstance(test_run_config.get('prompt'), dict): test_run_config['prompt'] = {} test_run_config['prompt']['content'] = prompt_override - # If your runner_config stores prompt directly as a string: - # test_run_config['prompt'] = prompt_override if strategies_override is not None: test_run_config['strategies'] = strategies_override + + test_run_config['output_path'] = self.default_config['output_path'] # Ensure essential keys are present (they should be from the stored config) if 'prompt' not in test_run_config or (isinstance(test_run_config.get('prompt'), dict) and 'content' not in test_run_config.get('prompt', {})): @@ -76,39 +76,6 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate if 'provider_name' not in test_run_config or 'provider' not in test_run_config or 'model' not in test_run_config['provider']: raise ValueError("Provider information (provider_name, model) is missing in the configuration.") - # To be deleted: API key will be loaded from tinydb - # API Key Handling - API key is NOT stored, fetched at runtime - # provider_details = test_run_config.get('provider', {}) - # The provider_name in runner_config might be just 'openai', not 'openai/gpt-4o' - # The model field usually contains the specific model like 'gpt-4o' - # The API key usually depends on the base provider (e.g., OPENAI_API_KEY) - # Let's assume 'provider_name' in the config root is the base provider e.g. 'openai' - # base_provider_name = test_run_config.get('provider_name', '').split('/')[0] - # api_key_env_var = f"{base_provider_name.upper()}_API_KEY" - # api_key = os.getenv(api_key_env_var) or get_key(".env", api_key_env_var) - - # if not api_key or api_key == 'n/a': - # print(f"Warning: API key for {base_provider_name.upper()} not found in environment variables or .env file.") - # Decide if this is a fatal error or if the runner handles it - - # test_run_config['provider']['api_key'] = api_key - # Ensure provider_name and model in the provider dict are correctly set for the runner - # The runner might expect provider_name to be like 'openai/gpt-4o' - # test_run_config['provider']['provider_name'] = f"{base_provider_name}/{test_run_config['provider']['model']}" - # This depends on what execute_prompt_tests_with_orchestrator expects for provider_name within the provider dict. - # Based on original code, it seems it expects 'provider_name': 'openai/gpt-4o', 'model': 'openai/gpt-4o' - # Let's ensure the 'provider' dict has the combined name for 'provider_name' and 'model' if not already structured that way. - # The ConfigManager.get_runner_config already structures it like: - # 'provider_name': 'openai', (at root) - # 'provider': {'provider_name': 'openai', 'model': 'openai', 'api_key': ...} - # The original UIConfigAdapter.run_test used: - # 'provider_name': f"{config['provider_name']}/{config['model']}" - # 'model': f"{config['provider_name']}/{config['model']}" - # Let's assume the stored runner_config has 'provider_name' (e.g. 'openai') at root, - # and 'provider': {'model': 'gpt-4o'} at least. - # We need to ensure the 'provider' dict passed to orchestrator is what it expects. - # The orchestrator likely uses the 'provider_name' from the root of test_run_config for LiteLLM. - console = Console() console.print(f"[bold cyan]Running test with profile ID '{config_id}':[/]") console.print(f"[bold cyan]Effective config for run: {test_run_config}[/]") @@ -125,6 +92,7 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate console.print("[bold green]Tests completed successfully![/]") report_file_path = report_data.get('report_metadata', {}).get('path') + print("Report Path::", report_file_path) if report_file_path: console.print(f"[bold cyan]Report saved successfully at {report_file_path}[/]") # Add report to config's past_runs diff --git a/core/reporter.py b/core/reporter.py index 4c0f018..f55fa87 100644 --- a/core/reporter.py +++ b/core/reporter.py @@ -4,7 +4,8 @@ from datetime import datetime -def save_report(report_data, output_path={"path": "reports", "filename": "report"}): + +def save_report(report_data, output_path): # Create directories if they don't exist timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_path = f"{output_path['path']}/{output_path['filename']}_{timestamp}.json" diff --git a/core/runner.py b/core/runner.py index 2f537c3..34bc18e 100644 --- a/core/runner.py +++ b/core/runner.py @@ -107,7 +107,7 @@ def execute_prompt_tests_with_orchestrator(config_dict): # Save report (optional) - output = config_dict.get('output_path') # Get from CLI argument + output = config_dict['output_path'] # Get from CLI argument save_report(report_data, output_path=output) return report_data diff --git a/ui/dashboard.py b/ui/dashboard.py index f1ffb09..f23bf86 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -164,9 +164,6 @@ def create_app_ui(): st.sidebar.title("Model Profiles") profiles = adapter.list_profiles() # Uses UIConfigAdapter.list_profiles() - # Create a mapping from profile ID to display name (profile_name or truncated ID) - profile_options = {profile['id']: profile.get('profile_name', f"Profile {profile['id'][:8]}") for profile in profiles} - if not profiles: st.sidebar.info("No saved profiles found.") # Ensure selected_profile_id is None if no profiles exist or selection is cleared @@ -174,52 +171,130 @@ def create_app_ui(): st.session_state.selected_profile_id = None # st.experimental_rerun() # Optional: rerun to clear main panel if a profile was deleted elsewhere else: - # Use a temporary variable for selectbox to detect change, then update session_state - # This helps manage reruns more explicitly if needed. - current_selection_in_selectbox = st.sidebar.selectbox( + # Initialize selection variables + current_selection = None + current_selection_in_selectbox = None + + # Use a temporary variable for selectbox to detect change + current_selection = st.sidebar.selectbox( "Select a Profile:", - options=list(profile_options.keys()), # Pass IDs as options - format_func=lambda id_key: profile_options[id_key], # Use the map for display names - index=None if not st.session_state.selected_profile_id or st.session_state.selected_profile_id not in profile_options else list(profile_options.keys()).index(st.session_state.selected_profile_id), - key="profile_selector_widget" # A unique key for the widget itself + options=profiles, # Pass profiles directly + format_func=lambda profile: profile.get('profile_name', f"Profile {profile['id'][:8]}") if profile else "No profiles available", + index=None if not st.session_state.selected_profile_id else next((i for i, p in enumerate(profiles) if p['id'] == st.session_state.selected_profile_id), None), + key="profile_selector_widget" ) - if st.session_state.selected_profile_id != current_selection_in_selectbox: - st.session_state.selected_profile_id = current_selection_in_selectbox - st.experimental_rerun() # Rerun to update the main panel with the new selection - - ## TODO: this code needs to be modified and moved to a model config page, where we - ## need to load the past test runs for the config. - # st.header("Test Reports") - # reports = get_reports() - # if not reports: - # st.info("No reports found. Run a test to generate reports.") - # else: - # st.write("### Recent Reports") - # for i, report in enumerate(reports): - # formatted_time = report['timestamp'].strftime('%Y-%m-%d %H:%M:%S') - # if st.button( - # f"Report {i+1}. (Runtime: {report['runtime']}, Run at: {formatted_time})", - # key=f"report_{report['name']}"): - # selected_report_path = report['path'] - # st.session_state['selected_report'] = selected_report_path - # st.rerun() - - # if 'selected_report' in st.session_state: - # open_dashboard_with_report(st.session_state['selected_report']) - # del st.session_state['selected_report'] + + # If a profile is selected, update session state with its ID + if current_selection: + current_selection_in_selectbox = current_selection['id'] + if st.session_state.selected_profile_id != current_selection_in_selectbox: + st.session_state.selected_profile_id = current_selection_in_selectbox + st.rerun() # Rerun to update the main panel with the new selection + + # Get selected profile + selected_profile = None + if st.session_state.selected_profile_id: + selected_profile = adapter.get_profile(st.session_state.selected_profile_id) + + # Get past runs for the selected profile + if selected_profile and 'past_runs' in selected_profile and isinstance(selected_profile['past_runs'], list): + if not selected_profile['past_runs']: + st.info("No test reports found for this profile. Run a test to generate reports.") + else: + st.write("### Recent Reports") + for i, report_path in enumerate(selected_profile['past_runs']): + try: + # Load report data + with open(report_path, 'r') as f: + report_data = json.load(f) + + # Format report info + formatted_time = datetime.fromtimestamp(os.path.getctime(report_path)).strftime('%Y-%m-%d %H:%M:%S') + + # Create report summary button + if st.button( + f"Report {i+1}. (Run at: {formatted_time})", + key=f"report_{report_path}"): + st.session_state.selected_report_path = report_path + st.rerun() + except Exception as e: + st.error(f"Error loading report: {str(e)}") + else: + st.info("Select a config or create a new config to view its test reports.") + + # If a report is selected, show its contents + if 'selected_report_path' in st.session_state: + try: + with open(st.session_state.selected_report_path, 'r') as f: + report_data = json.load(f) + + # Display report details + st.subheader("Report Details") + st.json(report_data) + + # Remove from session state after viewing + del st.session_state.selected_report_path + st.rerun() + except Exception as e: + st.error(f"Error viewing report: {str(e)}") - # Form for entering keys - with st.expander("Setup Configuration", expanded=True): - has_all_keys = False - if 'saved_config' not in st.session_state: - st.session_state['saved_config'] = {} + # Configuration Section + with st.expander("Setup New Configuration", expanded=not st.session_state.selected_profile_id): + # Load config from selected profile if available + if st.session_state.selected_profile_id: + selected_profile = adapter.get_profile(st.session_state.selected_profile_id) + if selected_profile: + st.session_state['saved_config'] = selected_profile + + # Display profile details in a nice card + with st.sidebar.container(): + st.markdown("### Selected Profile") + st.markdown(f"**Name:** {selected_profile.get('profile_name', 'Unnamed Profile')}") + st.markdown(f"**ID:** {selected_profile['id']}") + + # Display additional profile info if available + if 'provider' in selected_profile: + st.markdown(f"**Provider:** {selected_profile['provider']}") + if 'model' in selected_profile: + st.markdown(f"**Model:** {selected_profile['model']}") + + # Add a delete button + if st.button("Delete Profile", key="delete_profile_btn"): + adapter.delete_profile(selected_profile['id']) + st.session_state.selected_profile_id = None + st.rerun() + else: + if 'saved_config' not in st.session_state: + st.session_state['saved_config'] = {} # Select provider outside the form so it reruns on change provider_name = st.selectbox( "Select Provider", [p["name"] for p in PROVIDER_SETUP], index=len(PROVIDER_SETUP) - 1 ) + + # Form for creating new profile + if st.button("Create New Profile"): + with st.form("new_profile_form"): + new_profile_name = st.text_input("Profile Name", placeholder="Enter profile name") + if st.form_submit_button("Save Profile"): + if not new_profile_name: + st.error("Please enter a profile name") + return + + # Generate unique ID + profile_id = str(uuid.uuid4()) + + # Save config with profile name + config_to_save = st.session_state['saved_config'].copy() + config_to_save['profile_name'] = new_profile_name + adapter.save_profile(config_to_save, profile_id) + + # Update session state + st.session_state.selected_profile_id = profile_id + st.success(f"Profile '{new_profile_name}' created successfully!") + st.rerun() provider = next(p for p in PROVIDER_SETUP if p["name"] == provider_name) with st.form("provider_form", border=False): @@ -273,13 +348,12 @@ def create_app_ui(): st.session_state['saved_config'] = config profile_name = provider_name + "_" + model + "_" + str(uuid.uuid4()) adapter.save_profile(config, profile_name) - st.write("Profile saved successfully", profile_name) st.write("Config saved successfully", config) # Form for running tests with st.expander("Run New Test", expanded=True): submit_button_disabled = True - provider_config = st.session_state['saved_config'] + provider_config = selected_profile or st.session_state['saved_config'] if provider_config or has_all_keys: submit_button_disabled = False @@ -298,26 +372,21 @@ def create_app_ui(): st.stop() with st.spinner("🔍 Running tests..."): - stdout, stderr = run_test(prompt, selected_strategies, provider_config) + output = adapter.run_test(selected_profile['id'], prompt, selected_strategies) reports = get_reports() st.subheader("✅ Test Results") st.write("---") - if stdout: + if output: try: - json_output = json.loads(stdout) - render_beautiful_json_output(json_output) + render_beautiful_json_output(output) except json.JSONDecodeError: st.warning("âš ī¸ Output is not valid JSON. Showing raw output instead:") - st.code(stdout, language="text") + st.code(output, language="text") else: st.info("â„šī¸ No test output received.") - if stderr: - st.error("❌ Error Output:") - st.code(stderr, language="bash") - if reports: latest_report = reports[0] open_dashboard_with_report(latest_report["path"]) From 10622c5cf9ec9b71db53ef8fc098de357b7b6011 Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 17:44:52 +0530 Subject: [PATCH 05/12] chore: fix build error --- core/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/runner.py b/core/runner.py index 34bc18e..2f537c3 100644 --- a/core/runner.py +++ b/core/runner.py @@ -107,7 +107,7 @@ def execute_prompt_tests_with_orchestrator(config_dict): # Save report (optional) - output = config_dict['output_path'] # Get from CLI argument + output = config_dict.get('output_path') # Get from CLI argument save_report(report_data, output_path=output) return report_data From fbc8a5490017bb846a9081bb533718f4f0be57f4 Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 20:47:03 +0530 Subject: [PATCH 06/12] chore: remove print --- core/config_manager/ui_adapter.py | 1 - ui/dashboard.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/core/config_manager/ui_adapter.py b/core/config_manager/ui_adapter.py index f441dd2..2dc5f4d 100644 --- a/core/config_manager/ui_adapter.py +++ b/core/config_manager/ui_adapter.py @@ -92,7 +92,6 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate console.print("[bold green]Tests completed successfully![/]") report_file_path = report_data.get('report_metadata', {}).get('path') - print("Report Path::", report_file_path) if report_file_path: console.print(f"[bold cyan]Report saved successfully at {report_file_path}[/]") # Add report to config's past_runs diff --git a/ui/dashboard.py b/ui/dashboard.py index f23bf86..b596d94 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -372,7 +372,7 @@ def create_app_ui(): st.stop() with st.spinner("🔍 Running tests..."): - output = adapter.run_test(selected_profile['id'], prompt, selected_strategies) + output = adapter.run_test(provider_config["id"], prompt, selected_strategies) reports = get_reports() st.subheader("✅ Test Results") From 16a908c301772ba136b57172a9089526a785bf59 Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 21:32:47 +0530 Subject: [PATCH 07/12] chore: fix re runs --- ui/dashboard.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/ui/dashboard.py b/ui/dashboard.py index b596d94..864aaca 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -205,40 +205,29 @@ def create_app_ui(): for i, report_path in enumerate(selected_profile['past_runs']): try: # Load report data + print("Rport Path", report_path) with open(report_path, 'r') as f: report_data = json.load(f) # Format report info formatted_time = datetime.fromtimestamp(os.path.getctime(report_path)).strftime('%Y-%m-%d %H:%M:%S') + + # Create report summary button if st.button( f"Report {i+1}. (Run at: {formatted_time})", key=f"report_{report_path}"): st.session_state.selected_report_path = report_path - st.rerun() + st.session_state.viewing_report = True except Exception as e: st.error(f"Error loading report: {str(e)}") + if 'selected_report_path' in st.session_state: + open_dashboard_with_report(st.session_state['selected_report_path']) + del st.session_state['selected_report_path'] else: st.info("Select a config or create a new config to view its test reports.") - # If a report is selected, show its contents - if 'selected_report_path' in st.session_state: - try: - with open(st.session_state.selected_report_path, 'r') as f: - report_data = json.load(f) - - # Display report details - st.subheader("Report Details") - st.json(report_data) - - # Remove from session state after viewing - del st.session_state.selected_report_path - st.rerun() - except Exception as e: - st.error(f"Error viewing report: {str(e)}") - - # Configuration Section with st.expander("Setup New Configuration", expanded=not st.session_state.selected_profile_id): # Load config from selected profile if available From a48541937ac59ae55b9c2202e65437496a179ad7 Mon Sep 17 00:00:00 2001 From: Vini Katyal Date: Mon, 2 Jun 2025 21:46:11 +0530 Subject: [PATCH 08/12] chore: allow reseting to defaults --- ui/dashboard.py | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/ui/dashboard.py b/ui/dashboard.py index 864aaca..24e534d 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -284,6 +284,7 @@ def create_app_ui(): st.session_state.selected_profile_id = profile_id st.success(f"Profile '{new_profile_name}' created successfully!") st.rerun() + provider = next(p for p in PROVIDER_SETUP if p["name"] == provider_name) with st.form("provider_form", border=False): @@ -346,11 +347,48 @@ def create_app_ui(): if provider_config or has_all_keys: submit_button_disabled = False + + # Initialize session state for form values if not already set + if 'test_prompt' not in st.session_state: + st.session_state.test_prompt = "" + if 'test_strategies' not in st.session_state: + st.session_state.test_strategies = ["prompt_injection", "jailbreak"] + with st.form("test_form", clear_on_submit=True, border=False): - prompt = st.text_area("Enter your prompt:", height=150, placeholder="Enter your system prompt here...") + prompt = st.text_area( + "Enter your prompt:", + height=150, + placeholder="Enter your system prompt here...", + value=st.session_state.test_prompt + ) st.write("### Select Testing Strategies") - selected_strategies = st.multiselect("Choose strategies to test", get_available_strategies(), default=["prompt_injection", "jailbreak"]) - submit_button = st.form_submit_button(label="Run Test", type="primary", disabled=submit_button_disabled) + selected_strategies = st.multiselect( + "Choose strategies to test", + get_available_strategies(), + default=st.session_state.test_strategies + ) + + # Create a horizontal layout for buttons + col1, col2 = st.columns([2, 1]) + with col1: + submit_button = st.form_submit_button(label="Run Test", type="primary", disabled=submit_button_disabled) + with col2: + if st.form_submit_button(label="Reset to Defaults", type="secondary"): + st.session_state.test_prompt = "" + st.session_state.test_strategies = ["prompt_injection", "jailbreak"] + st.rerun() + + if submit_button: + # Save form values to session state + st.session_state.test_prompt = prompt + st.session_state.test_strategies = selected_strategies + + if not prompt.strip(): + st.error("đŸšĢ Please enter a prompt!") + st.stop() + if not selected_strategies: + st.error("đŸšĢ Please select at least one testing strategy!") + st.stop() if submit_button: if not prompt.strip(): From bcea9708fb9a2ef905637bae025953fa193cd861 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Mon, 2 Jun 2025 15:57:34 -0700 Subject: [PATCH 09/12] fix provider name the provider name was incorrectly set to the model name after the config changes. updates this to take the provider_name/model_name as the provider param for litellm. fixes bug: when the llm response is `None`, a few attacks are failing due to unsafe handling of `None` value. removes excessive or repetitive logging --- core/compliance_mappings/nist/adapter.py | 3 ++- core/config_manager/ui_adapter.py | 7 +++---- core/evaluators/evals/advanced_evaluators.py | 6 ++++-- core/runner.py | 18 ++++++------------ ui/components/security_findings.py | 3 ++- ui/dashboard.py | 5 ++--- 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/core/compliance_mappings/nist/adapter.py b/core/compliance_mappings/nist/adapter.py index 9373ebe..1316554 100644 --- a/core/compliance_mappings/nist/adapter.py +++ b/core/compliance_mappings/nist/adapter.py @@ -66,7 +66,8 @@ def enrich_attack_result(self, attack_result: Dict[str, Any]) -> Dict[str, Any]: severity = attack_result.get("evaluation", {}).get("severity", "medium") mutation_technique = attack_result.get("mutation_technique", "") target_behavior = attack_result.get("target_behavior", strategy_name.lower().replace("_", " ")) - llm_response = attack_result.get("response", {}).get("response", "-") + response_data = attack_result.get("response", {}) + llm_response = response_data.get("response", "-") if response_data else "-" # Map severity to impact and likelihood impact_likelihood = self._mapper.map_severity_to_impact_likelihood(severity) diff --git a/core/config_manager/ui_adapter.py b/core/config_manager/ui_adapter.py index 2dc5f4d..eadadd8 100644 --- a/core/config_manager/ui_adapter.py +++ b/core/config_manager/ui_adapter.py @@ -52,7 +52,6 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate # Make a copy to avoid modifying the stored config directly with temporary overrides test_run_config = full_runner_config.copy() - test_run_config['provider'] = test_run_config # Ensure provider dict is also a copy # Apply overrides if any if prompt_override is not None: @@ -64,7 +63,7 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate if strategies_override is not None: test_run_config['strategies'] = strategies_override - test_run_config['output_path'] = self.default_config['output_path'] + test_run_config['output_path'] = test_run_config.get('output_path', self.default_config['output_path']) # if not specified in config, use default # Ensure essential keys are present (they should be from the stored config) if 'prompt' not in test_run_config or (isinstance(test_run_config.get('prompt'), dict) and 'content' not in test_run_config.get('prompt', {})): @@ -73,7 +72,7 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate if 'strategies' not in test_run_config or not test_run_config['strategies']: if strategies_override is None: # only raise if no override was given raise ValueError("Strategies are missing in the configuration and no override provided.") - if 'provider_name' not in test_run_config or 'provider' not in test_run_config or 'model' not in test_run_config['provider']: + if 'provider_name' not in test_run_config or 'model' not in test_run_config: raise ValueError("Provider information (provider_name, model) is missing in the configuration.") console = Console() @@ -104,7 +103,7 @@ def run_test(self, config_id: str, prompt_override: Optional[str] = None, strate # --- Profile Management Methods --- - def save_profile(self, runner_config_data: Dict[str, Any], profile_name: Optional[str] = None) -> str: + def upsert_profile(self, runner_config_data: Dict[str, Any], profile_name: Optional[str] = None) -> str: """Saves a new profile or updates an existing one based on the presence of 'id' in runner_config_data. Returns the ID of the saved/updated configuration. """ diff --git a/core/evaluators/evals/advanced_evaluators.py b/core/evaluators/evals/advanced_evaluators.py index 4cd345c..849a153 100644 --- a/core/evaluators/evals/advanced_evaluators.py +++ b/core/evaluators/evals/advanced_evaluators.py @@ -30,7 +30,8 @@ async def evaluate(self, Evaluation of system prompt compliance """ # Extract response content - response_text = llm_response.get('response', {}).get('choices', [{}])[0].get('message', {}).get('content', '') + response_data = llm_response.get('response', {}) or {} # set response_data to empty dict if response is not present + response_text = response_data.get('choices', [{}])[0].get('message', {}).get('content', '') # Analyze system prompt for key instructions system_keywords = self._extract_system_keywords(system_prompt) @@ -124,7 +125,8 @@ async def evaluate(self, Evaluation of response appropriateness """ # Extract response content - response_text = llm_response.get('response', {}).get('choices', [{}])[0].get('message', {}).get('content', '') + response_data = llm_response.get('response', {}) or {} # set response_data to empty dict if response is not present + response_text = response_data.get('choices', [{}])[0].get('message', {}).get('content', '') print("My user prompt", user_prompt) diff --git a/core/runner.py b/core/runner.py index 2f537c3..6094a02 100644 --- a/core/runner.py +++ b/core/runner.py @@ -39,18 +39,15 @@ def execute_prompt_tests_with_orchestrator(config_dict): config = config_dict # Extract provider configuration with sensible defaults # model_name = config.get('provider_name') or config.get('provider', {}).get('name') - provider_config = config.get('provider') or config.get('provider_name') - model_name = '' - if isinstance(provider_config, dict): - model_name = provider_config.get('model') - else: - model_name = provider_config + provider_name = config.get('provider_name') + model_name = config.get('model') # Get API key - api_key = provider_config.get('api_key') + api_key = config.get('api_key') # Create provider configuration in one step provider_config = { - 'provider_name': model_name, + # litellm accepts 'provider_name/model_name' as the provider + 'provider_name': f"{provider_name}/{model_name}", 'api_key': api_key, 'temperature': config.get('temperature', 0.7), 'timeout': config.get('timeout', 30) @@ -58,10 +55,7 @@ def execute_prompt_tests_with_orchestrator(config_dict): # Create provider provider = LiteLLMProvider() - - console = Console() - console.print(f"[bold cyan]Running test with config: {config}[/]") - console.print(f"[bold cyan]Running test with provider config: {provider_config}[/]") + # Extract system prompt, handling both dict and string formats with default prompt_value = config.get('prompt', {}) system_prompt = (prompt_value.get('content') if isinstance(prompt_value, dict) else prompt_value) or "You are a helpful assistant" diff --git a/ui/components/security_findings.py b/ui/components/security_findings.py index b99c994..0319856 100644 --- a/ui/components/security_findings.py +++ b/ui/components/security_findings.py @@ -16,6 +16,7 @@ def render_security_findings(report_data): for strategy in report_data.get('testcases', []): strategy_name = strategy['strategy'].replace('_', ' ').title() for test in strategy['results']: + response_data = test.get('response', {}) findings_data.append({ 'Strategy': strategy_name, 'Severity': test.get('severity', 'Moderate'), @@ -25,7 +26,7 @@ def render_security_findings(report_data): 'Description': test.get('description', 'No description'), 'System Prompt': test.get('system_prompt', 'N/A'), 'Attack Prompt': test.get('attack_prompt', 'N/A'), - 'Response': test.get('response', {}).get('response', 'N/A'), + 'Response': response_data.get('response', 'N/A') if response_data else 'N/A', 'Evaluation': test.get('evaluation', {}).get('reason', 'No evaluation') }) diff --git a/ui/dashboard.py b/ui/dashboard.py index 24e534d..6458de8 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -205,7 +205,6 @@ def create_app_ui(): for i, report_path in enumerate(selected_profile['past_runs']): try: # Load report data - print("Rport Path", report_path) with open(report_path, 'r') as f: report_data = json.load(f) @@ -278,7 +277,7 @@ def create_app_ui(): # Save config with profile name config_to_save = st.session_state['saved_config'].copy() config_to_save['profile_name'] = new_profile_name - adapter.save_profile(config_to_save, profile_id) + adapter.upsert_profile(config_to_save, profile_id) # Update session state st.session_state.selected_profile_id = profile_id @@ -337,7 +336,7 @@ def create_app_ui(): st.session_state['saved_config'] = config profile_name = provider_name + "_" + model + "_" + str(uuid.uuid4()) - adapter.save_profile(config, profile_name) + adapter.upsert_profile(config, profile_name) st.write("Config saved successfully", config) # Form for running tests From 962f243c2b3bb5d93242b5bb1f788f61fc9c1a06 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Mon, 2 Jun 2025 16:15:53 -0700 Subject: [PATCH 10/12] fix tests update requirements-lock.txt with the updated dependencies. skip the cli test: will remove docs for the test run using the cli. cli run is broken and won't be supported anymore. fixes the broken test runner. --- requirements-lock.txt | 79 +++++++++++++++++++++++++++++-------------- tests/test_cli.py | 2 ++ tests/test_runner.py | 3 +- 3 files changed, 58 insertions(+), 26 deletions(-) diff --git a/requirements-lock.txt b/requirements-lock.txt index 4133f6c..eda9ed5 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -5,58 +5,92 @@ aiosignal==1.3.2 altair==5.5.0 annotated-types==0.7.0 anyio==4.9.0 +asgiref==3.8.1 attrs==25.3.0 -black==25.1.0 +azure-core==1.34.0 +azure-core-tracing-opentelemetry==1.0.0b12 +azure-identity==1.23.0 +azure-monitor-opentelemetry==1.6.7 +azure-monitor-opentelemetry-exporter==1.0.0b36 blinker==1.9.0 cachetools==5.5.2 certifi==2025.4.26 +cffi==1.17.1 charset-normalizer==3.4.2 -click==8.2.0 +click==8.2.1 +-e file:///Users/kaushik/Code/fc/compliant-llm +cryptography==45.0.3 +deprecated==1.2.18 distro==1.9.0 filelock==3.18.0 -flake8==7.2.0 +fixedint==0.1.6 frozenlist==1.6.0 fsspec==2025.5.0 gitdb==4.0.12 gitpython==3.1.44 +googleapis-common-protos==1.70.0 +grpcio==1.71.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 huggingface-hub==0.31.4 idna==3.10 -importlib-metadata==8.7.0 +importlib-metadata==8.6.1 iniconfig==2.1.0 +isodate==0.7.2 jinja2==3.1.6 jiter==0.10.0 jsonschema==4.23.0 jsonschema-specifications==2025.4.1 -litellm==1.70.0 +litellm==1.70.2 +markdown==3.8 markdown-it-py==3.0.0 markupsafe==3.0.2 -mccabe==0.7.0 mdurl==0.1.2 +msal==1.32.3 +msal-extensions==1.3.1 +msrest==0.7.1 multidict==6.4.4 -mypy==1.15.0 -mypy-extensions==1.1.0 narwhals==1.40.0 numpy==2.2.6 +oauthlib==3.2.2 openai==1.75.0 +opentelemetry-api==1.33.1 +opentelemetry-exporter-otlp==1.33.1 +opentelemetry-exporter-otlp-proto-common==1.33.1 +opentelemetry-exporter-otlp-proto-grpc==1.33.1 +opentelemetry-exporter-otlp-proto-http==1.33.1 +opentelemetry-instrumentation==0.54b1 +opentelemetry-instrumentation-asgi==0.54b1 +opentelemetry-instrumentation-dbapi==0.54b1 +opentelemetry-instrumentation-django==0.54b1 +opentelemetry-instrumentation-fastapi==0.54b1 +opentelemetry-instrumentation-flask==0.54b1 +opentelemetry-instrumentation-psycopg2==0.54b1 +opentelemetry-instrumentation-requests==0.54b1 +opentelemetry-instrumentation-urllib==0.54b1 +opentelemetry-instrumentation-urllib3==0.54b1 +opentelemetry-instrumentation-wsgi==0.54b1 +opentelemetry-proto==1.33.1 +opentelemetry-resource-detector-azure==0.1.5 +opentelemetry-sdk==1.33.1 +opentelemetry-semantic-conventions==0.54b1 +opentelemetry-util-http==0.54b1 packaging==24.2 pandas==2.2.3 -pathspec==0.12.1 pillow==11.2.1 -platformdirs==4.3.8 +plotly==6.1.1 pluggy==1.6.0 --e file:///Users/kaushik/Code/fc/compliant-llm propcache==0.3.1 -protobuf==6.31.0 +protobuf==5.29.4 +psutil==6.1.1 pyarrow==20.0.0 -pycodestyle==2.13.0 +pycparser==2.22 pydantic==2.11.4 pydantic-core==2.33.2 pydeck==0.9.1 -pyflakes==3.3.2 pygments==2.19.1 +pyjwt==2.10.1 pytest==8.3.5 python-dateutil==2.9.0.post0 python-dotenv==1.1.0 @@ -65,29 +99,24 @@ pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 +requests-oauthlib==2.0.0 rich==14.0.0 -rpds-py==0.25.0 +rpds-py==0.25.1 six==1.17.0 smmap==5.0.2 sniffio==1.3.1 streamlit==1.45.1 tenacity==9.1.2 tiktoken==0.9.0 +tinydb==4.8.2 tokenizers==0.21.1 toml==0.10.2 -tornado==6.5 +tornado==6.5.1 tqdm==4.67.1 typing-extensions==4.13.2 -typing-inspection==0.4.0 +typing-inspection==0.4.1 tzdata==2025.2 urllib3==2.4.0 +wrapt==1.17.2 yarl==1.20.0 zipp==3.21.0 -opentelemetry-api -opentelemetry-sdk -opentelemetry-exporter-otlp -opentelemetry-instrumentation -azure-monitor-opentelemetry-exporter -azure-core -azure-identity -azure-monitor-opentelemetry \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index 89e8b23..1704b4e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,8 +6,10 @@ import os import json import subprocess +import pytest +@pytest.mark.skip(reason="Test disabled by user request") def test_cli_commands(): """Test the Compliant LLM CLI commands for running attacks and generating reports.""" print("Starting Compliant LLM CLI test...") diff --git a/tests/test_runner.py b/tests/test_runner.py index d992b4c..b366037 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -222,7 +222,8 @@ def test_execute_prompt_tests_with_orchestrator(): # Build config config = { 'system_prompt': 'You are a helpful assistant', - 'provider': {'name': 'test-model'}, + 'provider_name': 'test', + 'model': 'test-model', 'strategies': ['jailbreak', 'prompt_injection'] } From 8ed547b99880425f779455061e061bae02052017 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Mon, 2 Jun 2025 16:23:04 -0700 Subject: [PATCH 11/12] add pytest-asyncio to pyproject.toml to fix build error on github actions --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 266e6c3..81f13ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "litellm", "python-dotenv", "pytest", + "pytest-asyncio", "aiofiles", "plotly", "psutil", From 398aa880405a890ba4df14d613b2a26a484e3962 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Mon, 2 Jun 2025 16:47:57 -0700 Subject: [PATCH 12/12] update README and docs --- README.md | 45 ++------------------- docs/getting_started.md | 70 +++----------------------------- docs/installation.md | 50 ++++------------------- docs/quickstart.md | 89 ----------------------------------------- 4 files changed, 17 insertions(+), 237 deletions(-) delete mode 100644 docs/quickstart.md diff --git a/README.md b/README.md index 46a9bc0..79453cf 100644 --- a/README.md +++ b/README.md @@ -35,54 +35,17 @@ Go through our [documentation](https://github.com/fiddlecube/compliant-llm/tree/ - ⚡ **End to End Testing**: Test your AI systems end to end - 📄 **Detailed Reporting**: Comprehensive reports with actionable insights -## âš™ī¸ Installation +## âš™ī¸ Install and Run ```bash +# install pip install compliant-llm -``` - -## Connect to your LLM - -Initialize the API key(s) and configuration to access the target LLM - -```bash -# for openai models: -export OPENAI_API_KEY=your-api-key-here - -# anthropic models: -export ANTHROPIC_API_KEY=your-api-key-here -# azure openai models: -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -## 🚀 Quick Start - -1. You can use the compliant-llm dashboard: - -```bash +# run the dashboard compliant-llm dashboard ``` -2. Or use the CLI: - -```bash -compliant-llm test --prompt "You are a helpful assistant who can only respond ethically" --strategy "prompt_injection,jailbreak" --provider="azure/gpt-4o" -``` - -3. Or use a configuration file: - -```bash -compliant-llm test --config_path configs/config.yaml -``` - -3. View the latest test report in UI: - -```bash -compliant-llm dashboard -``` +Configure your LLM provider and run attacks ![](docs/gif/demo.gif) diff --git a/docs/getting_started.md b/docs/getting_started.md index 82aeedc..7e4a4f6 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -7,7 +7,11 @@ Compliant LLM is a tool designed to evaluate the robustness of AI system prompts ### Using pip ```bash +# install pip install compliant-llm + +# run the dashboard +compliant-llm dashboard ``` ### From source @@ -18,68 +22,4 @@ cd compliant-llm pip install -e . ``` -## Quick Start - -### Using the UI - -Start the compliant-llm UI dashboard using the command: - -```bash -compliant-llm dashboard -``` - -### Using the CLI - -#### Create a configuration file or use the default one: - -```yaml -prompt: | - Your system prompt here... - -provider_name: openai/gpt-4o -strategy: prompt_injection,adversarial -``` - -#### Connect to the LLM: - -##### For OpenAI models - -```bash -export OPENAI_API_KEY=your_api_key_here -``` - -##### For Anthropic models - -```bash -export ANTHROPIC_API_KEY=your_api_key_here -``` - -##### For Azure OpenAI models - -```bash -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -#### Run the tool - -```bash -compliant-llm test --config configs/your_config.yaml -``` - -#### Or simply run the test with the following CLI arguments: - -```bash -compliant-llm test --prompt "Your system prompt here..." --strategy "prompt_injection,adversarial" --provider "openai/gpt-4o" -``` - -#### View the results: - -```bash -compliant-llm dashboard -``` - -## Next Steps - -- Learn about [configuration options](./configuration.md) +Use the dashboard to connect to your LLM provider and test your prompts. diff --git a/docs/installation.md b/docs/installation.md index fecfc2f..113bafb 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -27,60 +27,26 @@ For the latest development version or to contribute to the project, you can inst git clone https://github.com/fiddlecube/compliant-llm.git cd compliant-llm -# Activate the uv venv. -# Install uv if you don't have it -uv venv .venv -source .venv/bin/activate - # Install the package from source -uv pip install -e . - -# This installs the compliant-llm package in the current venv +pip install -e . ``` -### After installation, you should be able to use these commands: -`compliant-llm test --prompt "You are a helpful assistant."` to test a prompt -`compliant-llm dashboard` to start the UI dashboard -`compliant-llm generate config --output configs/config.yaml` to generate a config file -`compliant-llm config --list` to list all the config options - -## Verifying Installation - -To verify that Compliant LLM is correctly installed, run: +## Run the dashboard ```bash -compliant-llm --version +compliant-llm dashboard ``` -This should display the version number of Compliant LLM. - -## API Key Setup - -Compliant LLM requires API keys for the LLM providers you want to use. Set these as environment variables: +Connect to your LLM provider and run attacks against your prompts. -### OpenAI API Key - -```bash -export OPENAI_API_KEY=your-api-key-here -``` - -### Anthropic API Key - -```bash -export ANTHROPIC_API_KEY=your-anthropic-key -``` +## Verifying Installation -### Azure OpenAI API Key +To verify that Compliant LLM is correctly installed, run: ```bash -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" +compliant-llm --version ``` -When building from source, you can also create a `.env` file in your project root with these variables. - - #### Missing Dependencies If you encounter errors about missing dependencies, try installing with the full set of dependencies: @@ -108,4 +74,4 @@ If the tests run but fail to connect to the API: ## Next Steps -After installation, proceed to the [Quick Start Guide](quickstart.md) to begin using Compliant LLM. +After installation, proceed to the [Getting Started Guide](getting_started.md) to begin using Compliant LLM. diff --git a/docs/quickstart.md b/docs/quickstart.md deleted file mode 100644 index 60d3b58..0000000 --- a/docs/quickstart.md +++ /dev/null @@ -1,89 +0,0 @@ -# Quick Start Guide - -This guide will help you get up and running with Compliant LLM quickly. - -## Prerequisites - -- Python 9 or higher -- An API key for at least one of the supported LLM providers (OpenAI, Anthropic, Google) - -## Installation - -If you haven't installed Compliant LLM yet, follow the [installation instructions](installation.md). - -## Basic Usage - -### 1. Set up your API key - -```bash -# For OpenAI models (recommended for first-time users) -export OPENAI_API_KEY=your-api-key-here - -# For Anthropic models -export ANTHROPIC_API_KEY=your-anthropic-key - -# For Azure OpenAI models -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -### 2. Run a simple test - -Test a basic system prompt against prompt injection attacks: - -```bash -compliant-llm test --prompt "You are a helpful assistant for a banking organization." -``` - -This will: - -- Test your prompt against the default prompt injection strategy -- Use the OpenAI GPT-4o model -- Save results - -### 3. View the test report on the UI dashboard - -```bash -compliant-llm dashboard -``` - -![Dashboard View](https://github.com/fiddlecube/compliant-llm/blob/main/docs/images/ui_screenshot.png) - -Here you will be able to see all your past test runs. - -Clicking them open will show you detailed reports, including: - -- Number of successful/failed tests -- Timestamp and settings used for the test -- Test results -- Test summary -- NIST compliance status -- Major risks and vulnerabilities identified - -You can also start a new test run using the UI. - -### 4. Run tests with multiple strategies - -```bash -compliant-llm test --prompt "You are a helpful assistant." \ - --strategy "prompt_injection,jailbreak,adversarial" -``` - -### 5. Create and use a configuration file - -For more complex testing scenarios, create a configuration file: - -```bash -compliant-llm generate config --output configs/my_config.yaml -``` - -Edit the generated file according to your needs, then run tests using this configuration: - -```bash -compliant-llm test --config configs/my_config.yaml -``` - -## Next Steps - -- Learn about [configuration options](configuration.md) for advanced testing scenarios