11import json
2- import re
32import uuid
43from typing import Tuple
54
65from typing_extensions import deprecated
76
8- MITRE_X_CAPEC = (
9- "x_capec_*" # https://github.com/mitre-attack/attack-stix-data/issues/34
7+ from pycti .utils .opencti_stix2_identifier import (
8+ external_reference_generate_id ,
9+ kill_chain_phase_generate_id ,
1010)
11- unsupported_ref_patterns = [MITRE_X_CAPEC ]
11+ from pycti .utils .opencti_stix2_utils import (
12+ STIX_CYBER_OBSERVABLE_MAPPING ,
13+ SUPPORTED_STIX_ENTITY_OBJECTS ,
14+ )
15+
16+ supported_types = (
17+ SUPPORTED_STIX_ENTITY_OBJECTS # entities
18+ + list (STIX_CYBER_OBSERVABLE_MAPPING .keys ()) # observables
19+ + ["relationship" , "sighting" ] # relationships
20+ )
21+
22+
23+ def is_id_supported (key ):
24+ id_type = key .split ("--" )[0 ]
25+ return id_type in supported_types
1226
1327
1428class OpenCTIStix2Splitter :
1529 def __init__ (self ):
1630 self .cache_index = {}
31+ self .cache_refs = {}
1732 self .elements = []
18- self .unsupported_patterns = list (
19- map (lambda pattern : re .compile (pattern ), unsupported_ref_patterns )
20- )
2133
22- def is_ref_key_supported (self , key ):
23- for pattern in self .unsupported_patterns :
24- if pattern .match (key ):
25- return False
26- return True
27-
28- def enlist_element (self , item_id , raw_data ):
34+ def enlist_element (
35+ self , item_id , raw_data , cleanup_inconsistent_bundle , parent_acc
36+ ):
2937 nb_deps = 1
3038 if item_id not in raw_data :
3139 return 0
40+
3241 existing_item = self .cache_index .get (item_id )
3342 if existing_item is not None :
3443 return existing_item ["nb_deps" ]
35- # Recursive enlist for every refs
44+
3645 item = raw_data [item_id ]
46+ if self .cache_refs .get (item_id ) is None :
47+ self .cache_refs [item_id ] = []
3748 for key in list (item .keys ()):
3849 value = item [key ]
39- if key .endswith ("_refs" ) and self .is_ref_key_supported (key ):
50+ # Recursive enlist for every refs
51+ if key .endswith ("_refs" ):
4052 to_keep = []
4153 for element_ref in item [key ]:
42- if element_ref != item_id :
43- nb_deps += self .enlist_element (element_ref , raw_data )
44- to_keep .append (element_ref )
54+ # We need to check if this ref is not already a reference
55+ is_missing_ref = raw_data .get (element_ref ) is None
56+ must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
57+ not_dependency_ref = (
58+ self .cache_refs .get (element_ref ) is None
59+ or item_id not in self .cache_refs [element_ref ]
60+ )
61+ # Prevent any self reference
62+ if (
63+ is_id_supported (element_ref )
64+ and not must_be_cleaned
65+ and element_ref not in parent_acc
66+ and element_ref != item_id
67+ and not_dependency_ref
68+ ):
69+ self .cache_refs [item_id ].append (element_ref )
70+ nb_deps += self .enlist_element (
71+ element_ref ,
72+ raw_data ,
73+ cleanup_inconsistent_bundle ,
74+ parent_acc + [element_ref ],
75+ )
76+ if element_ref not in to_keep :
77+ to_keep .append (element_ref )
4578 item [key ] = to_keep
46- elif key .endswith ("_ref" ) and self .is_ref_key_supported (key ):
47- if item [key ] == item_id :
48- item [key ] = None
79+ elif key .endswith ("_ref" ):
80+ is_missing_ref = raw_data .get (value ) is None
81+ must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle
82+ not_dependency_ref = (
83+ self .cache_refs .get (value ) is None
84+ or item_id not in self .cache_refs [value ]
85+ )
86+ # Prevent any self reference
87+ if (
88+ value is not None
89+ and not must_be_cleaned
90+ and value not in parent_acc
91+ and is_id_supported (value )
92+ and value != item_id
93+ and not_dependency_ref
94+ ):
95+ self .cache_refs [item_id ].append (value )
96+ nb_deps += self .enlist_element (
97+ value ,
98+ raw_data ,
99+ cleanup_inconsistent_bundle ,
100+ parent_acc + [value ],
101+ )
49102 else :
50- # Need to handle the special case of recursive ref for created by ref
51- is_created_by_ref = key == "created_by_ref"
52- if is_created_by_ref :
53- is_marking = item ["id" ].startswith ("marking-definition--" )
54- if is_marking is False :
55- nb_deps += self .enlist_element (value , raw_data )
56- else :
57- nb_deps += self .enlist_element (value , raw_data )
103+ item [key ] = None
104+ # Case for embedded elements (deduplicating and cleanup)
105+ elif key == "external_references" :
106+ # specific case of splitting external references
107+ # reference_ids = []
108+ deduplicated_references = []
109+ deduplicated_references_cache = {}
110+ references = item [key ]
111+ for reference in references :
112+ reference_id = external_reference_generate_id (
113+ url = reference .get ("url" ),
114+ source_name = reference .get ("source_name" ),
115+ external_id = reference .get ("external_id" ),
116+ )
117+ if (
118+ reference_id is not None
119+ and deduplicated_references_cache .get (reference_id ) is None
120+ ):
121+ deduplicated_references_cache [reference_id ] = reference_id
122+ deduplicated_references .append (reference )
123+ # - Needed for a future move of splitting the elements
124+ # reference["id"] = reference_id
125+ # reference["type"] = "External-Reference"
126+ # raw_data[reference_id] = reference
127+ # if reference_id not in reference_ids:
128+ # reference_ids.append(reference_id)
129+ # nb_deps += self.enlist_element(reference_id, raw_data)
130+ item [key ] = deduplicated_references
131+ elif key == "kill_chain_phases" :
132+ # specific case of splitting kill_chain phases
133+ # kill_chain_ids = []
134+ deduplicated_kill_chain = []
135+ deduplicated_kill_chain_cache = {}
136+ kill_chains = item [key ]
137+ for kill_chain in kill_chains :
138+ kill_chain_id = kill_chain_phase_generate_id (
139+ kill_chain_name = kill_chain .get ("kill_chain_name" ),
140+ phase_name = kill_chain .get ("phase_name" ),
141+ )
142+ if (
143+ kill_chain_id is not None
144+ and deduplicated_kill_chain_cache .get (kill_chain_id ) is None
145+ ):
146+ deduplicated_kill_chain_cache [kill_chain_id ] = kill_chain_id
147+ deduplicated_kill_chain .append (kill_chain )
148+ # - Needed for a future move of splitting the elements
149+ # kill_chain["id"] = kill_chain_id
150+ # kill_chain["type"] = "Kill-Chain-Phase"
151+ # raw_data[kill_chain_id] = kill_chain
152+ # if kill_chain_id not in kill_chain_ids:
153+ # kill_chain_ids.append(kill_chain_id)
154+ # nb_deps += self.enlist_element(kill_chain_id, raw_data)
155+ item [key ] = deduplicated_kill_chain
156+
58157 # Get the final dep counting and add in cache
59158 item ["nb_deps" ] = nb_deps
60- self .elements .append (item )
61- self .cache_index [item_id ] = item # Put in cache
159+ # Put in cache
160+ if self .cache_index .get (item_id ) is None :
161+ # enlist only if compatible
162+ if item ["type" ] == "relationship" :
163+ is_compatible = (
164+ item ["source_ref" ] is not None and item ["target_ref" ] is not None
165+ )
166+ elif item ["type" ] == "sighting" :
167+ is_compatible = (
168+ item ["sighting_of_ref" ] is not None
169+ and len (item ["where_sighted_refs" ]) > 0
170+ )
171+ else :
172+ is_compatible = is_id_supported (item_id )
173+ if is_compatible :
174+ self .elements .append (item )
175+ self .cache_index [item_id ] = item
176+
62177 return nb_deps
63178
64179 def split_bundle_with_expectations (
65- self , bundle , use_json = True , event_version = None
180+ self ,
181+ bundle ,
182+ use_json = True ,
183+ event_version = None ,
184+ cleanup_inconsistent_bundle = False ,
66185 ) -> Tuple [int , list ]:
67186 """splits a valid stix2 bundle into a list of bundles"""
68187 if use_json :
@@ -84,7 +203,7 @@ def split_bundle_with_expectations(
84203 for item in bundle_data ["objects" ]:
85204 raw_data [item ["id" ]] = item
86205 for item in bundle_data ["objects" ]:
87- self .enlist_element (item ["id" ], raw_data )
206+ self .enlist_element (item ["id" ], raw_data , cleanup_inconsistent_bundle , [] )
88207
89208 # Build the bundles
90209 bundles = []
0 commit comments