diff --git a/README.md b/README.md index 46a9bc0..79453cf 100644 --- a/README.md +++ b/README.md @@ -35,54 +35,17 @@ Go through our [documentation](https://github.com/fiddlecube/compliant-llm/tree/ - ⚡ **End to End Testing**: Test your AI systems end to end - 📄 **Detailed Reporting**: Comprehensive reports with actionable insights -## âš™ī¸ Installation +## âš™ī¸ Install and Run ```bash +# install pip install compliant-llm -``` - -## Connect to your LLM - -Initialize the API key(s) and configuration to access the target LLM - -```bash -# for openai models: -export OPENAI_API_KEY=your-api-key-here - -# anthropic models: -export ANTHROPIC_API_KEY=your-api-key-here -# azure openai models: -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -## 🚀 Quick Start - -1. You can use the compliant-llm dashboard: - -```bash +# run the dashboard compliant-llm dashboard ``` -2. Or use the CLI: - -```bash -compliant-llm test --prompt "You are a helpful assistant who can only respond ethically" --strategy "prompt_injection,jailbreak" --provider="azure/gpt-4o" -``` - -3. Or use a configuration file: - -```bash -compliant-llm test --config_path configs/config.yaml -``` - -3. View the latest test report in UI: - -```bash -compliant-llm dashboard -``` +Configure your LLM provider and run attacks ![](docs/gif/demo.gif) diff --git a/core/compliance_mappings/nist/adapter.py b/core/compliance_mappings/nist/adapter.py index 9373ebe..1316554 100644 --- a/core/compliance_mappings/nist/adapter.py +++ b/core/compliance_mappings/nist/adapter.py @@ -66,7 +66,8 @@ def enrich_attack_result(self, attack_result: Dict[str, Any]) -> Dict[str, Any]: severity = attack_result.get("evaluation", {}).get("severity", "medium") mutation_technique = attack_result.get("mutation_technique", "") target_behavior = attack_result.get("target_behavior", strategy_name.lower().replace("_", " ")) - llm_response = attack_result.get("response", {}).get("response", "-") + response_data = attack_result.get("response", {}) + llm_response = response_data.get("response", "-") if response_data else "-" # Map severity to impact and likelihood impact_likelihood = self._mapper.map_severity_to_impact_likelihood(severity) diff --git a/core/config_manager/ui_adapter.py b/core/config_manager/ui_adapter.py index 7ecf3d2..eadadd8 100644 --- a/core/config_manager/ui_adapter.py +++ b/core/config_manager/ui_adapter.py @@ -6,6 +6,7 @@ from typing import Dict, List, Any, Optional from .config import ConfigManager, DEFAULT_REPORTS_DIR from core.runner import execute_prompt_tests_with_orchestrator +from core.data_store import model_config_store from rich.console import Console from rich.progress import ( Progress, SpinnerColumn, TextColumn, TimeElapsedColumn @@ -30,45 +31,53 @@ def __init__(self, config_manager: Optional[ConfigManager] = None): "output_path": {"path": str(DEFAULT_REPORTS_DIR), "filename": "report"}, # Default output path } - def run_test(self, prompt: str, strategies: List[str], config: Dict[str, Any]) -> Dict[str, Any]: + def run_test(self, config_id: str, prompt_override: Optional[str] = None, strategies_override: Optional[List[str]] = None) -> Dict[str, Any]: """ - Run tests with UI-specific configuration. - + Run tests using a stored configuration profile, with optional overrides. + Args: - prompt: The system prompt to test - strategies: List of test strategies to use - + config_id: The ID of the configuration profile to use. + prompt_override: Optional new prompt content to use for this run. + strategies_override: Optional new list of strategies to use for this run. + Returns: - Dictionary containing test results - + Dictionary containing test results. + Raises: - ValueError: If required parameters are missing + ValueError: If the configuration profile is not found or required parameters are missing. """ - if not prompt: - raise ValueError("Prompt is required") - if not strategies: - raise ValueError("At least one strategy is required") + full_runner_config = self.get_profile(config_id) + if not full_runner_config: + raise ValueError(f"Configuration profile with ID '{config_id}' not found.") + + # Make a copy to avoid modifying the stored config directly with temporary overrides + test_run_config = full_runner_config.copy() + + # Apply overrides if any + if prompt_override is not None: + # Assuming prompt is stored as {'content': '...'} + if 'prompt' not in test_run_config or not isinstance(test_run_config.get('prompt'), dict): + test_run_config['prompt'] = {} + test_run_config['prompt']['content'] = prompt_override + + if strategies_override is not None: + test_run_config['strategies'] = strategies_override - # Create test configuration - api_key_key = f"{config['provider_name'].upper()}_API_KEY" - api_key = os.getenv(api_key_key, 'n/a') or get_key(".env", api_key_key) - - test_config = { - "prompt": {"content": prompt}, - "strategies": strategies, - "provider": { - "provider_name": f"{config['provider_name']}/{config['model']}", - "model": f"{config['provider_name']}/{config['model']}", - "api_key": api_key, - }, - "temperature": self.default_config["temperature"], - "timeout": self.default_config["timeout"], - "max_tokens": self.default_config["max_tokens"], - "output_path": self.default_config["output_path"] - } + test_run_config['output_path'] = test_run_config.get('output_path', self.default_config['output_path']) # if not specified in config, use default + + # Ensure essential keys are present (they should be from the stored config) + if 'prompt' not in test_run_config or (isinstance(test_run_config.get('prompt'), dict) and 'content' not in test_run_config.get('prompt', {})): + if prompt_override is None: # only raise if no override was given + raise ValueError("Prompt content is missing in the configuration and no override provided.") + if 'strategies' not in test_run_config or not test_run_config['strategies']: + if strategies_override is None: # only raise if no override was given + raise ValueError("Strategies are missing in the configuration and no override provided.") + if 'provider_name' not in test_run_config or 'model' not in test_run_config: + raise ValueError("Provider information (provider_name, model) is missing in the configuration.") + console = Console() - console.print(f"[bold cyan]Running test with config: {test_config}[/]") - + console.print(f"[bold cyan]Running test with profile ID '{config_id}':[/]") + console.print(f"[bold cyan]Effective config for run: {test_run_config}[/]") with Progress( SpinnerColumn(), @@ -76,16 +85,46 @@ def run_test(self, prompt: str, strategies: List[str], config: Dict[str, Any]) - TimeElapsedColumn(), ) as progress: task = progress.add_task("[cyan]Testing prompt security", total=None) - report_data = execute_prompt_tests_with_orchestrator(test_config) + # Pass the fully prepared test_run_config + report_data = execute_prompt_tests_with_orchestrator(test_run_config) progress.update(task, completed=True) console.print("[bold green]Tests completed successfully![/]") - console.print(f"[bold cyan]Report saved successfully at {report_data['report_metadata']['path']}[/]") + report_file_path = report_data.get('report_metadata', {}).get('path') + if report_file_path: + console.print(f"[bold cyan]Report saved successfully at {report_file_path}[/]") + # Add report to config's past_runs + model_config_store.add_report_to_config(config_id, str(report_file_path)) + else: + console.print("[bold yellow]Report path not found in report data. Cannot link to profile.[/]") console.print("\n") - # Execute the test with orchestrator return report_data + + # --- Profile Management Methods --- + + def upsert_profile(self, runner_config_data: Dict[str, Any], profile_name: Optional[str] = None) -> str: + """Saves a new profile or updates an existing one based on the presence of 'id' in runner_config_data. + Returns the ID of the saved/updated configuration. + """ + # model_config_store.save_config ensures 'id' is present and handles profile_name + model_config_store.save_config(runner_config_data, profile_name=profile_name) + return runner_config_data['id'] # 'id' is guaranteed by save_config + + def get_profile(self, config_id: str) -> Optional[Dict[str, Any]]: + """Retrieves a specific configuration profile by its ID.""" + return model_config_store.get_config(config_id) + + def list_profiles(self) -> List[Dict[str, Any]]: + """Lists all saved configuration profiles.""" + return model_config_store.list_configs() + + def delete_profile(self, config_id: str) -> bool: + """Deletes a configuration profile by its ID. Returns True if deleted.""" + return model_config_store.delete_config(config_id) + # --- Existing Methods for Default/Unsaved Config --- + def update_config(self, config: Dict[str, Any]) -> None: """ Update the default configuration. diff --git a/core/data_store/__init__.py b/core/data_store/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/data_store/model_config_store.py b/core/data_store/model_config_store.py new file mode 100644 index 0000000..f25f21b --- /dev/null +++ b/core/data_store/model_config_store.py @@ -0,0 +1,88 @@ +from pathlib import Path +from tinydb import TinyDB, Query +from typing import List, Dict, Any, Optional +import uuid + +DB_DIR = Path.home() / ".compliant-llm" +CONFIG_DB_FILE = DB_DIR / "model_config.json" + +# Ensure the .compliant-llm directory exists +DB_DIR.mkdir(parents=True, exist_ok=True) + +_db_instance = None + +def _get_table(): + global _db_instance + if _db_instance is None: + _db_instance = TinyDB(CONFIG_DB_FILE) + return _db_instance.table('model_config') + +def save_config(runner_config_data: Dict[str, Any], profile_name: str | None = None) -> None: + """Saves or updates a model configuration profile.""" + table = _get_table() + + # Ensure 'id' exists, add if not + if 'id' not in runner_config_data: + runner_config_data['id'] = str(uuid.uuid4()) + + # Ensure 'past_runs' exists if it's a new config or not present + if 'past_runs' not in runner_config_data: + runner_config_data['past_runs'] = [] + + # Add/update profile_name within the document for easier access if needed + if profile_name is not None: + runner_config_data['profile_name'] = profile_name + document_to_store = runner_config_data + + ConfigQuery = Query() + table.upsert(document_to_store, ConfigQuery.id == runner_config_data['id']) + print(f"Config '{profile_name}' saved.") + +def get_config(id: str) -> Optional[Dict[str, Any]]: + """Retrieves a specific model configuration profile.""" + table = _get_table() + ConfigQuery = Query() + return table.get(ConfigQuery.id == id) + +def list_configs() -> List[Dict[str, Any]]: + """Lists all saved model configuration profiles.""" + table = _get_table() + return table.all() + +def delete_config(id: str) -> bool: + """Deletes a model configuration profile. Returns True if deleted.""" + table = _get_table() + ConfigQuery = Query() + deleted_ids = table.remove(ConfigQuery.id == id) + return len(deleted_ids) > 0 + +def add_report_to_config(id: str, report_file_path: str) -> bool: + """Adds a report file path to the 'past_runs' list of a specific config.""" + table = _get_table() + ConfigQuery = Query() + config_doc = table.get(ConfigQuery.id == id) + + if not config_doc: + print(f"Error: Config profile '{id}' not found.") + return False + + # Ensure past_runs is a list + if 'past_runs' not in config_doc or not isinstance(config_doc['past_runs'], list): + config_doc['past_runs'] = [] + + # Avoid duplicate entries + if report_file_path not in config_doc['past_runs']: + config_doc['past_runs'].append(report_file_path) + table.upsert(config_doc, ConfigQuery.id == id) + print(f"Report '{report_file_path}' added to config '{id}'.") + return True + else: + print(f"Report '{report_file_path}' already exists in config '{id}'.") + return False + +def close_db(): + """Closes the database connection.""" + global _db_instance + if _db_instance: + _db_instance.close() + _db_instance = None diff --git a/core/evaluators/evals/advanced_evaluators.py b/core/evaluators/evals/advanced_evaluators.py index 4cd345c..849a153 100644 --- a/core/evaluators/evals/advanced_evaluators.py +++ b/core/evaluators/evals/advanced_evaluators.py @@ -30,7 +30,8 @@ async def evaluate(self, Evaluation of system prompt compliance """ # Extract response content - response_text = llm_response.get('response', {}).get('choices', [{}])[0].get('message', {}).get('content', '') + response_data = llm_response.get('response', {}) or {} # set response_data to empty dict if response is not present + response_text = response_data.get('choices', [{}])[0].get('message', {}).get('content', '') # Analyze system prompt for key instructions system_keywords = self._extract_system_keywords(system_prompt) @@ -124,7 +125,8 @@ async def evaluate(self, Evaluation of response appropriateness """ # Extract response content - response_text = llm_response.get('response', {}).get('choices', [{}])[0].get('message', {}).get('content', '') + response_data = llm_response.get('response', {}) or {} # set response_data to empty dict if response is not present + response_text = response_data.get('choices', [{}])[0].get('message', {}).get('content', '') print("My user prompt", user_prompt) diff --git a/core/reporter.py b/core/reporter.py index 4c0f018..f55fa87 100644 --- a/core/reporter.py +++ b/core/reporter.py @@ -4,7 +4,8 @@ from datetime import datetime -def save_report(report_data, output_path={"path": "reports", "filename": "report"}): + +def save_report(report_data, output_path): # Create directories if they don't exist timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_path = f"{output_path['path']}/{output_path['filename']}_{timestamp}.json" diff --git a/core/runner.py b/core/runner.py index 2f537c3..6094a02 100644 --- a/core/runner.py +++ b/core/runner.py @@ -39,18 +39,15 @@ def execute_prompt_tests_with_orchestrator(config_dict): config = config_dict # Extract provider configuration with sensible defaults # model_name = config.get('provider_name') or config.get('provider', {}).get('name') - provider_config = config.get('provider') or config.get('provider_name') - model_name = '' - if isinstance(provider_config, dict): - model_name = provider_config.get('model') - else: - model_name = provider_config + provider_name = config.get('provider_name') + model_name = config.get('model') # Get API key - api_key = provider_config.get('api_key') + api_key = config.get('api_key') # Create provider configuration in one step provider_config = { - 'provider_name': model_name, + # litellm accepts 'provider_name/model_name' as the provider + 'provider_name': f"{provider_name}/{model_name}", 'api_key': api_key, 'temperature': config.get('temperature', 0.7), 'timeout': config.get('timeout', 30) @@ -58,10 +55,7 @@ def execute_prompt_tests_with_orchestrator(config_dict): # Create provider provider = LiteLLMProvider() - - console = Console() - console.print(f"[bold cyan]Running test with config: {config}[/]") - console.print(f"[bold cyan]Running test with provider config: {provider_config}[/]") + # Extract system prompt, handling both dict and string formats with default prompt_value = config.get('prompt', {}) system_prompt = (prompt_value.get('content') if isinstance(prompt_value, dict) else prompt_value) or "You are a helpful assistant" diff --git a/docs/getting_started.md b/docs/getting_started.md index 82aeedc..7e4a4f6 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -7,7 +7,11 @@ Compliant LLM is a tool designed to evaluate the robustness of AI system prompts ### Using pip ```bash +# install pip install compliant-llm + +# run the dashboard +compliant-llm dashboard ``` ### From source @@ -18,68 +22,4 @@ cd compliant-llm pip install -e . ``` -## Quick Start - -### Using the UI - -Start the compliant-llm UI dashboard using the command: - -```bash -compliant-llm dashboard -``` - -### Using the CLI - -#### Create a configuration file or use the default one: - -```yaml -prompt: | - Your system prompt here... - -provider_name: openai/gpt-4o -strategy: prompt_injection,adversarial -``` - -#### Connect to the LLM: - -##### For OpenAI models - -```bash -export OPENAI_API_KEY=your_api_key_here -``` - -##### For Anthropic models - -```bash -export ANTHROPIC_API_KEY=your_api_key_here -``` - -##### For Azure OpenAI models - -```bash -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -#### Run the tool - -```bash -compliant-llm test --config configs/your_config.yaml -``` - -#### Or simply run the test with the following CLI arguments: - -```bash -compliant-llm test --prompt "Your system prompt here..." --strategy "prompt_injection,adversarial" --provider "openai/gpt-4o" -``` - -#### View the results: - -```bash -compliant-llm dashboard -``` - -## Next Steps - -- Learn about [configuration options](./configuration.md) +Use the dashboard to connect to your LLM provider and test your prompts. diff --git a/docs/installation.md b/docs/installation.md index fecfc2f..113bafb 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -27,60 +27,26 @@ For the latest development version or to contribute to the project, you can inst git clone https://github.com/fiddlecube/compliant-llm.git cd compliant-llm -# Activate the uv venv. -# Install uv if you don't have it -uv venv .venv -source .venv/bin/activate - # Install the package from source -uv pip install -e . - -# This installs the compliant-llm package in the current venv +pip install -e . ``` -### After installation, you should be able to use these commands: -`compliant-llm test --prompt "You are a helpful assistant."` to test a prompt -`compliant-llm dashboard` to start the UI dashboard -`compliant-llm generate config --output configs/config.yaml` to generate a config file -`compliant-llm config --list` to list all the config options - -## Verifying Installation - -To verify that Compliant LLM is correctly installed, run: +## Run the dashboard ```bash -compliant-llm --version +compliant-llm dashboard ``` -This should display the version number of Compliant LLM. - -## API Key Setup - -Compliant LLM requires API keys for the LLM providers you want to use. Set these as environment variables: +Connect to your LLM provider and run attacks against your prompts. -### OpenAI API Key - -```bash -export OPENAI_API_KEY=your-api-key-here -``` - -### Anthropic API Key - -```bash -export ANTHROPIC_API_KEY=your-anthropic-key -``` +## Verifying Installation -### Azure OpenAI API Key +To verify that Compliant LLM is correctly installed, run: ```bash -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" +compliant-llm --version ``` -When building from source, you can also create a `.env` file in your project root with these variables. - - #### Missing Dependencies If you encounter errors about missing dependencies, try installing with the full set of dependencies: @@ -108,4 +74,4 @@ If the tests run but fail to connect to the API: ## Next Steps -After installation, proceed to the [Quick Start Guide](quickstart.md) to begin using Compliant LLM. +After installation, proceed to the [Getting Started Guide](getting_started.md) to begin using Compliant LLM. diff --git a/docs/quickstart.md b/docs/quickstart.md deleted file mode 100644 index 60d3b58..0000000 --- a/docs/quickstart.md +++ /dev/null @@ -1,89 +0,0 @@ -# Quick Start Guide - -This guide will help you get up and running with Compliant LLM quickly. - -## Prerequisites - -- Python 9 or higher -- An API key for at least one of the supported LLM providers (OpenAI, Anthropic, Google) - -## Installation - -If you haven't installed Compliant LLM yet, follow the [installation instructions](installation.md). - -## Basic Usage - -### 1. Set up your API key - -```bash -# For OpenAI models (recommended for first-time users) -export OPENAI_API_KEY=your-api-key-here - -# For Anthropic models -export ANTHROPIC_API_KEY=your-anthropic-key - -# For Azure OpenAI models -export AZURE_API_KEY="my-azure-api-key" -export AZURE_API_BASE="https://example-endpoint.openai.azure.com" -export AZURE_API_VERSION="2023-05-15" -``` - -### 2. Run a simple test - -Test a basic system prompt against prompt injection attacks: - -```bash -compliant-llm test --prompt "You are a helpful assistant for a banking organization." -``` - -This will: - -- Test your prompt against the default prompt injection strategy -- Use the OpenAI GPT-4o model -- Save results - -### 3. View the test report on the UI dashboard - -```bash -compliant-llm dashboard -``` - -![Dashboard View](https://github.com/fiddlecube/compliant-llm/blob/main/docs/images/ui_screenshot.png) - -Here you will be able to see all your past test runs. - -Clicking them open will show you detailed reports, including: - -- Number of successful/failed tests -- Timestamp and settings used for the test -- Test results -- Test summary -- NIST compliance status -- Major risks and vulnerabilities identified - -You can also start a new test run using the UI. - -### 4. Run tests with multiple strategies - -```bash -compliant-llm test --prompt "You are a helpful assistant." \ - --strategy "prompt_injection,jailbreak,adversarial" -``` - -### 5. Create and use a configuration file - -For more complex testing scenarios, create a configuration file: - -```bash -compliant-llm generate config --output configs/my_config.yaml -``` - -Edit the generated file according to your needs, then run tests using this configuration: - -```bash -compliant-llm test --config configs/my_config.yaml -``` - -## Next Steps - -- Learn about [configuration options](configuration.md) for advanced testing scenarios diff --git a/pyproject.toml b/pyproject.toml index 30845dc..81f13ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,9 +20,11 @@ dependencies = [ "litellm", "python-dotenv", "pytest", + "pytest-asyncio", "aiofiles", "plotly", "psutil", + "tinydb", "markdown", "opentelemetry-api", "opentelemetry-sdk", diff --git a/requirements-lock.txt b/requirements-lock.txt index 4133f6c..eda9ed5 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -5,58 +5,92 @@ aiosignal==1.3.2 altair==5.5.0 annotated-types==0.7.0 anyio==4.9.0 +asgiref==3.8.1 attrs==25.3.0 -black==25.1.0 +azure-core==1.34.0 +azure-core-tracing-opentelemetry==1.0.0b12 +azure-identity==1.23.0 +azure-monitor-opentelemetry==1.6.7 +azure-monitor-opentelemetry-exporter==1.0.0b36 blinker==1.9.0 cachetools==5.5.2 certifi==2025.4.26 +cffi==1.17.1 charset-normalizer==3.4.2 -click==8.2.0 +click==8.2.1 +-e file:///Users/kaushik/Code/fc/compliant-llm +cryptography==45.0.3 +deprecated==1.2.18 distro==1.9.0 filelock==3.18.0 -flake8==7.2.0 +fixedint==0.1.6 frozenlist==1.6.0 fsspec==2025.5.0 gitdb==4.0.12 gitpython==3.1.44 +googleapis-common-protos==1.70.0 +grpcio==1.71.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 huggingface-hub==0.31.4 idna==3.10 -importlib-metadata==8.7.0 +importlib-metadata==8.6.1 iniconfig==2.1.0 +isodate==0.7.2 jinja2==3.1.6 jiter==0.10.0 jsonschema==4.23.0 jsonschema-specifications==2025.4.1 -litellm==1.70.0 +litellm==1.70.2 +markdown==3.8 markdown-it-py==3.0.0 markupsafe==3.0.2 -mccabe==0.7.0 mdurl==0.1.2 +msal==1.32.3 +msal-extensions==1.3.1 +msrest==0.7.1 multidict==6.4.4 -mypy==1.15.0 -mypy-extensions==1.1.0 narwhals==1.40.0 numpy==2.2.6 +oauthlib==3.2.2 openai==1.75.0 +opentelemetry-api==1.33.1 +opentelemetry-exporter-otlp==1.33.1 +opentelemetry-exporter-otlp-proto-common==1.33.1 +opentelemetry-exporter-otlp-proto-grpc==1.33.1 +opentelemetry-exporter-otlp-proto-http==1.33.1 +opentelemetry-instrumentation==0.54b1 +opentelemetry-instrumentation-asgi==0.54b1 +opentelemetry-instrumentation-dbapi==0.54b1 +opentelemetry-instrumentation-django==0.54b1 +opentelemetry-instrumentation-fastapi==0.54b1 +opentelemetry-instrumentation-flask==0.54b1 +opentelemetry-instrumentation-psycopg2==0.54b1 +opentelemetry-instrumentation-requests==0.54b1 +opentelemetry-instrumentation-urllib==0.54b1 +opentelemetry-instrumentation-urllib3==0.54b1 +opentelemetry-instrumentation-wsgi==0.54b1 +opentelemetry-proto==1.33.1 +opentelemetry-resource-detector-azure==0.1.5 +opentelemetry-sdk==1.33.1 +opentelemetry-semantic-conventions==0.54b1 +opentelemetry-util-http==0.54b1 packaging==24.2 pandas==2.2.3 -pathspec==0.12.1 pillow==11.2.1 -platformdirs==4.3.8 +plotly==6.1.1 pluggy==1.6.0 --e file:///Users/kaushik/Code/fc/compliant-llm propcache==0.3.1 -protobuf==6.31.0 +protobuf==5.29.4 +psutil==6.1.1 pyarrow==20.0.0 -pycodestyle==2.13.0 +pycparser==2.22 pydantic==2.11.4 pydantic-core==2.33.2 pydeck==0.9.1 -pyflakes==3.3.2 pygments==2.19.1 +pyjwt==2.10.1 pytest==8.3.5 python-dateutil==2.9.0.post0 python-dotenv==1.1.0 @@ -65,29 +99,24 @@ pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 +requests-oauthlib==2.0.0 rich==14.0.0 -rpds-py==0.25.0 +rpds-py==0.25.1 six==1.17.0 smmap==5.0.2 sniffio==1.3.1 streamlit==1.45.1 tenacity==9.1.2 tiktoken==0.9.0 +tinydb==4.8.2 tokenizers==0.21.1 toml==0.10.2 -tornado==6.5 +tornado==6.5.1 tqdm==4.67.1 typing-extensions==4.13.2 -typing-inspection==0.4.0 +typing-inspection==0.4.1 tzdata==2025.2 urllib3==2.4.0 +wrapt==1.17.2 yarl==1.20.0 zipp==3.21.0 -opentelemetry-api -opentelemetry-sdk -opentelemetry-exporter-otlp -opentelemetry-instrumentation -azure-monitor-opentelemetry-exporter -azure-core -azure-identity -azure-monitor-opentelemetry \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c8e60ac..9707d91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ aiofiles plotly psutil markdown +tinydb opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp diff --git a/tests/test_cli.py b/tests/test_cli.py index 89e8b23..1704b4e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,8 +6,10 @@ import os import json import subprocess +import pytest +@pytest.mark.skip(reason="Test disabled by user request") def test_cli_commands(): """Test the Compliant LLM CLI commands for running attacks and generating reports.""" print("Starting Compliant LLM CLI test...") diff --git a/tests/test_runner.py b/tests/test_runner.py index d992b4c..b366037 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -222,7 +222,8 @@ def test_execute_prompt_tests_with_orchestrator(): # Build config config = { 'system_prompt': 'You are a helpful assistant', - 'provider': {'name': 'test-model'}, + 'provider_name': 'test', + 'model': 'test-model', 'strategies': ['jailbreak', 'prompt_injection'] } diff --git a/ui/components/security_findings.py b/ui/components/security_findings.py index b99c994..0319856 100644 --- a/ui/components/security_findings.py +++ b/ui/components/security_findings.py @@ -16,6 +16,7 @@ def render_security_findings(report_data): for strategy in report_data.get('testcases', []): strategy_name = strategy['strategy'].replace('_', ' ').title() for test in strategy['results']: + response_data = test.get('response', {}) findings_data.append({ 'Strategy': strategy_name, 'Severity': test.get('severity', 'Moderate'), @@ -25,7 +26,7 @@ def render_security_findings(report_data): 'Description': test.get('description', 'No description'), 'System Prompt': test.get('system_prompt', 'N/A'), 'Attack Prompt': test.get('attack_prompt', 'N/A'), - 'Response': test.get('response', {}).get('response', 'N/A'), + 'Response': response_data.get('response', 'N/A') if response_data else 'N/A', 'Evaluation': test.get('evaluation', {}).get('reason', 'No evaluation') }) diff --git a/ui/dashboard.py b/ui/dashboard.py index e295c9a..6458de8 100644 --- a/ui/dashboard.py +++ b/ui/dashboard.py @@ -1,5 +1,7 @@ +import profile import sys import time +import uuid import psutil import subprocess from pathlib import Path @@ -145,6 +147,11 @@ def create_app_ui(): st.title("Compliant LLM UI") st.write("Test and analyze your AI prompts for security vulnerabilities") + adapter = UIConfigAdapter() + # Initialize session state for selected profile if not already present + if 'selected_profile_id' not in st.session_state: + st.session_state.selected_profile_id = None + # sidebar of main page with st.sidebar: if st.button("Open Documentation"): @@ -154,37 +161,129 @@ def create_app_ui(): except Exception as e: st.error(f"Error opening documentation: {str(e)}") - st.header("Test Reports") - reports = get_reports() - if not reports: - st.info("No reports found. Run a test to generate reports.") + st.sidebar.title("Model Profiles") + profiles = adapter.list_profiles() # Uses UIConfigAdapter.list_profiles() + + if not profiles: + st.sidebar.info("No saved profiles found.") + # Ensure selected_profile_id is None if no profiles exist or selection is cleared + if st.session_state.selected_profile_id is not None: + st.session_state.selected_profile_id = None + # st.experimental_rerun() # Optional: rerun to clear main panel if a profile was deleted elsewhere + else: + # Initialize selection variables + current_selection = None + current_selection_in_selectbox = None + + # Use a temporary variable for selectbox to detect change + current_selection = st.sidebar.selectbox( + "Select a Profile:", + options=profiles, # Pass profiles directly + format_func=lambda profile: profile.get('profile_name', f"Profile {profile['id'][:8]}") if profile else "No profiles available", + index=None if not st.session_state.selected_profile_id else next((i for i, p in enumerate(profiles) if p['id'] == st.session_state.selected_profile_id), None), + key="profile_selector_widget" + ) + + # If a profile is selected, update session state with its ID + if current_selection: + current_selection_in_selectbox = current_selection['id'] + if st.session_state.selected_profile_id != current_selection_in_selectbox: + st.session_state.selected_profile_id = current_selection_in_selectbox + st.rerun() # Rerun to update the main panel with the new selection + + # Get selected profile + selected_profile = None + if st.session_state.selected_profile_id: + selected_profile = adapter.get_profile(st.session_state.selected_profile_id) + + # Get past runs for the selected profile + if selected_profile and 'past_runs' in selected_profile and isinstance(selected_profile['past_runs'], list): + if not selected_profile['past_runs']: + st.info("No test reports found for this profile. Run a test to generate reports.") else: st.write("### Recent Reports") - for i, report in enumerate(reports): - formatted_time = report['timestamp'].strftime('%Y-%m-%d %H:%M:%S') - if st.button( - f"Report {i+1}. (Runtime: {report['runtime']}, Run at: {formatted_time})", - key=f"report_{report['name']}"): - selected_report_path = report['path'] - st.session_state['selected_report'] = selected_report_path - st.rerun() + for i, report_path in enumerate(selected_profile['past_runs']): + try: + # Load report data + with open(report_path, 'r') as f: + report_data = json.load(f) + + # Format report info + formatted_time = datetime.fromtimestamp(os.path.getctime(report_path)).strftime('%Y-%m-%d %H:%M:%S') - if 'selected_report' in st.session_state: - open_dashboard_with_report(st.session_state['selected_report']) - del st.session_state['selected_report'] - - # Form for entering keys - with st.expander("Setup Configuration", expanded=True): - has_all_keys = False - if 'saved_config' not in st.session_state: - st.session_state['saved_config'] = {} + + # Create report summary button + if st.button( + f"Report {i+1}. (Run at: {formatted_time})", + key=f"report_{report_path}"): + st.session_state.selected_report_path = report_path + st.session_state.viewing_report = True + except Exception as e: + st.error(f"Error loading report: {str(e)}") + if 'selected_report_path' in st.session_state: + open_dashboard_with_report(st.session_state['selected_report_path']) + del st.session_state['selected_report_path'] + else: + st.info("Select a config or create a new config to view its test reports.") + + # Configuration Section + with st.expander("Setup New Configuration", expanded=not st.session_state.selected_profile_id): + # Load config from selected profile if available + if st.session_state.selected_profile_id: + selected_profile = adapter.get_profile(st.session_state.selected_profile_id) + if selected_profile: + st.session_state['saved_config'] = selected_profile + + # Display profile details in a nice card + with st.sidebar.container(): + st.markdown("### Selected Profile") + st.markdown(f"**Name:** {selected_profile.get('profile_name', 'Unnamed Profile')}") + st.markdown(f"**ID:** {selected_profile['id']}") + + # Display additional profile info if available + if 'provider' in selected_profile: + st.markdown(f"**Provider:** {selected_profile['provider']}") + if 'model' in selected_profile: + st.markdown(f"**Model:** {selected_profile['model']}") + + # Add a delete button + if st.button("Delete Profile", key="delete_profile_btn"): + adapter.delete_profile(selected_profile['id']) + st.session_state.selected_profile_id = None + st.rerun() + else: + if 'saved_config' not in st.session_state: + st.session_state['saved_config'] = {} # Select provider outside the form so it reruns on change provider_name = st.selectbox( "Select Provider", [p["name"] for p in PROVIDER_SETUP], index=len(PROVIDER_SETUP) - 1 ) + + # Form for creating new profile + if st.button("Create New Profile"): + with st.form("new_profile_form"): + new_profile_name = st.text_input("Profile Name", placeholder="Enter profile name") + if st.form_submit_button("Save Profile"): + if not new_profile_name: + st.error("Please enter a profile name") + return + + # Generate unique ID + profile_id = str(uuid.uuid4()) + + # Save config with profile name + config_to_save = st.session_state['saved_config'].copy() + config_to_save['profile_name'] = new_profile_name + adapter.upsert_profile(config_to_save, profile_id) + + # Update session state + st.session_state.selected_profile_id = profile_id + st.success(f"Profile '{new_profile_name}' created successfully!") + st.rerun() + provider = next(p for p in PROVIDER_SETUP if p["name"] == provider_name) with st.form("provider_form", border=False): @@ -236,20 +335,59 @@ def create_app_ui(): config[field] = val st.session_state['saved_config'] = config - st.write("Configuration saved successfully", config) + profile_name = provider_name + "_" + model + "_" + str(uuid.uuid4()) + adapter.upsert_profile(config, profile_name) + st.write("Config saved successfully", config) # Form for running tests with st.expander("Run New Test", expanded=True): submit_button_disabled = True - provider_config = st.session_state['saved_config'] + provider_config = selected_profile or st.session_state['saved_config'] if provider_config or has_all_keys: submit_button_disabled = False + + # Initialize session state for form values if not already set + if 'test_prompt' not in st.session_state: + st.session_state.test_prompt = "" + if 'test_strategies' not in st.session_state: + st.session_state.test_strategies = ["prompt_injection", "jailbreak"] + with st.form("test_form", clear_on_submit=True, border=False): - prompt = st.text_area("Enter your prompt:", height=150, placeholder="Enter your system prompt here...") + prompt = st.text_area( + "Enter your prompt:", + height=150, + placeholder="Enter your system prompt here...", + value=st.session_state.test_prompt + ) st.write("### Select Testing Strategies") - selected_strategies = st.multiselect("Choose strategies to test", get_available_strategies(), default=["prompt_injection", "jailbreak"]) - submit_button = st.form_submit_button(label="Run Test", type="primary", disabled=submit_button_disabled) + selected_strategies = st.multiselect( + "Choose strategies to test", + get_available_strategies(), + default=st.session_state.test_strategies + ) + + # Create a horizontal layout for buttons + col1, col2 = st.columns([2, 1]) + with col1: + submit_button = st.form_submit_button(label="Run Test", type="primary", disabled=submit_button_disabled) + with col2: + if st.form_submit_button(label="Reset to Defaults", type="secondary"): + st.session_state.test_prompt = "" + st.session_state.test_strategies = ["prompt_injection", "jailbreak"] + st.rerun() + + if submit_button: + # Save form values to session state + st.session_state.test_prompt = prompt + st.session_state.test_strategies = selected_strategies + + if not prompt.strip(): + st.error("đŸšĢ Please enter a prompt!") + st.stop() + if not selected_strategies: + st.error("đŸšĢ Please select at least one testing strategy!") + st.stop() if submit_button: if not prompt.strip(): @@ -260,26 +398,21 @@ def create_app_ui(): st.stop() with st.spinner("🔍 Running tests..."): - stdout, stderr = run_test(prompt, selected_strategies, provider_config) + output = adapter.run_test(provider_config["id"], prompt, selected_strategies) reports = get_reports() st.subheader("✅ Test Results") st.write("---") - if stdout: + if output: try: - json_output = json.loads(stdout) - render_beautiful_json_output(json_output) + render_beautiful_json_output(output) except json.JSONDecodeError: st.warning("âš ī¸ Output is not valid JSON. Showing raw output instead:") - st.code(stdout, language="text") + st.code(output, language="text") else: st.info("â„šī¸ No test output received.") - if stderr: - st.error("❌ Error Output:") - st.code(stderr, language="bash") - if reports: latest_report = reports[0] open_dashboard_with_report(latest_report["path"])