|
269 | 269 | " config_oid = source[\"configuration_oid\"]\n",
|
270 | 270 | " domain_oid = source[\"domain_oid\"]\n",
|
271 | 271 | "\n",
|
272 |
| - " all_rules = source[\"rules\"]\n", |
273 |
| - " all_url_filters = source[\"url_filters\"]\n", |
274 |
| - "\n", |
275 |
| - " # extract url filters\n", |
276 |
| - " url_filters = []\n", |
277 |
| - " if all_url_filters:\n", |
278 |
| - " url_filters = [\n", |
279 |
| - " {\n", |
280 |
| - " \"type\": all_url_filters[0][\"filter\"],\n", |
281 |
| - " \"pattern\": all_url_filters[0][\"pattern\"],\n", |
282 |
| - " }\n", |
283 |
| - " ]\n", |
284 |
| - "\n", |
285 |
| - " # extract rulesets\n", |
286 |
| - " action_translation_map = {\n", |
287 |
| - " \"fixed\": \"set\",\n", |
288 |
| - " \"extracted\": \"extract\",\n", |
289 |
| - " }\n", |
| 272 | + " # ensure the config and domain oids actually exist in our in-memory data structure\n", |
| 273 | + " if (\n", |
| 274 | + " config_oid in inflight_configuration_data\n", |
| 275 | + " and domain_oid in inflight_configuration_data[config_oid][\"domains_temp\"]\n", |
| 276 | + " ):\n", |
| 277 | + "\n", |
| 278 | + " # initialize extraction rulesets an empty array if it doesn't exist yet\n", |
| 279 | + " if (\n", |
| 280 | + " not \"extraction_rulesets\"\n", |
| 281 | + " in inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid]\n", |
| 282 | + " ):\n", |
| 283 | + " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
| 284 | + " \"extraction_rulesets\"\n", |
| 285 | + " ] = []\n", |
| 286 | + "\n", |
| 287 | + " all_rules = source[\"rules\"]\n", |
| 288 | + " all_url_filters = source[\"url_filters\"]\n", |
| 289 | + "\n", |
| 290 | + " # extract url filters\n", |
| 291 | + " url_filters = []\n", |
| 292 | + " if all_url_filters:\n", |
| 293 | + " url_filters = [\n", |
| 294 | + " {\n", |
| 295 | + " \"type\": all_url_filters[0][\"filter\"],\n", |
| 296 | + " \"pattern\": all_url_filters[0][\"pattern\"],\n", |
| 297 | + " }\n", |
| 298 | + " ]\n", |
290 | 299 | "\n",
|
291 |
| - " ruleset = {}\n", |
292 |
| - " if all_rules:\n", |
293 |
| - " ruleset = [\n", |
294 |
| - " {\n", |
295 |
| - " \"action\": action_translation_map[\n", |
296 |
| - " all_rules[0][\"content_from\"][\"value_type\"]\n", |
297 |
| - " ],\n", |
298 |
| - " \"field_name\": all_rules[0][\"field_name\"],\n", |
299 |
| - " \"selector\": all_rules[0][\"selector\"],\n", |
300 |
| - " \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n", |
301 |
| - " \"value\": all_rules[0][\"content_from\"][\"value\"],\n", |
302 |
| - " \"source\": all_rules[0][\"source_type\"],\n", |
303 |
| - " }\n", |
304 |
| - " ]\n", |
| 300 | + " # extract rulesets\n", |
| 301 | + " action_translation_map = {\n", |
| 302 | + " \"fixed\": \"set\",\n", |
| 303 | + " \"extracted\": \"extract\",\n", |
| 304 | + " }\n", |
305 | 305 | "\n",
|
306 |
| - " # populate the in-memory data structure\n", |
307 |
| - " temp_extraction_rulesets = [\n", |
308 |
| - " {\n", |
| 306 | + " ruleset = []\n", |
| 307 | + " if all_rules:\n", |
| 308 | + " ruleset = [\n", |
| 309 | + " {\n", |
| 310 | + " \"action\": action_translation_map[\n", |
| 311 | + " all_rules[0][\"content_from\"][\"value_type\"]\n", |
| 312 | + " ],\n", |
| 313 | + " \"field_name\": all_rules[0][\"field_name\"],\n", |
| 314 | + " \"selector\": all_rules[0][\"selector\"],\n", |
| 315 | + " \"join_as\": all_rules[0][\"multiple_objects_handling\"],\n", |
| 316 | + " \"value\": all_rules[0][\"content_from\"][\"value\"],\n", |
| 317 | + " \"source\": all_rules[0][\"source_type\"],\n", |
| 318 | + " }\n", |
| 319 | + " ]\n", |
| 320 | + "\n", |
| 321 | + " temp_extraction_rulesets = {\n", |
309 | 322 | " \"url_filters\": url_filters,\n",
|
310 | 323 | " \"rules\": ruleset,\n",
|
311 | 324 | " }\n",
|
312 |
| - " ]\n", |
313 | 325 | "\n",
|
314 |
| - " print(\n", |
315 |
| - " f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n", |
316 |
| - " )\n", |
317 |
| - " extr_count += 1\n", |
318 |
| - " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
319 |
| - " \"extraction_rulesets\"\n", |
320 |
| - " ] = temp_extraction_rulesets" |
| 326 | + " print(\n", |
| 327 | + " f\"{extr_count}.) Crawler {config_oid} has extraction rules {temp_extraction_rulesets}\\n\"\n", |
| 328 | + " )\n", |
| 329 | + " extr_count += 1\n", |
| 330 | + "\n", |
| 331 | + " inflight_configuration_data[config_oid][\"domains_temp\"][domain_oid][\n", |
| 332 | + " \"extraction_rulesets\"\n", |
| 333 | + " ].append(temp_extraction_rulesets)" |
321 | 334 | ]
|
322 | 335 | },
|
323 | 336 | {
|
|
0 commit comments