From 226f28545be342d5f3846de525e7589647049247 Mon Sep 17 00:00:00 2001 From: rubybear Date: Sat, 19 Apr 2025 16:53:09 -0600 Subject: [PATCH 1/2] feat: enhance string interpolation with nested accessors support --- src/crewai/utilities/string_utils.py | 341 ++++++++++++++++++++++----- tests/utilities/test_string_utils.py | 48 +++- 2 files changed, 329 insertions(+), 60 deletions(-) diff --git a/src/crewai/utilities/string_utils.py b/src/crewai/utilities/string_utils.py index 9a1857781c..2cce1f9427 100644 --- a/src/crewai/utilities/string_utils.py +++ b/src/crewai/utilities/string_utils.py @@ -1,82 +1,307 @@ +# /home/ruby/PycharmProjects/crewAI/src/crewai/utilities/string_utils.py import re -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Optional, Union +# Regex to parse chained accessors like .attribute, [0], ['key'], ["key"] +# Ensures a consistent number of capture groups regardless of match type. +# Group 1: Full dot accessor (e.g., `.attr`) +# Group 2: Attribute name (e.g., `attr`) +# Group 3: Full bracket accessor (e.g., `[0]`, `['key']`) +# Group 4: Content inside brackets (e.g., `0`, `'key'`, `"key"`) +# Group 5: Integer index (e.g., `0`) +# Group 6: Single-quoted key content (e.g., `key`) - without quotes +# Group 7: Double-quoted key content (e.g., `key`) - without quotes +_ACCESSOR_PATTERN = re.compile( + # Option 1: Dot Accessor + r"(\.\s*([A-Za-z_][A-Za-z0-9_]*))" # Groups 1 (full), 2 (name) + # Option 2: Bracket Accessor + r"|(\[\s*(" # Groups 3 (full), 4 (content w/ quotes) + # Content Type 1: Integer Index + r"([0-9]+)" # Group 5 (index) + # Content Type 2: Single-Quoted Key + r"|'((?:[^'\\]|\\.)*)'" # Group 6 (key content) + # Content Type 3: Double-Quoted Key + r"|\"((?:[^\"\\]|\\.)*)\"" # Group 7 (key content) + r")\s*\])" +) -def interpolate_only( - input_string: Optional[str], - inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]], -) -> str: - """Interpolate placeholders (e.g., {key}) in a string while leaving JSON untouched. - Only interpolates placeholders that follow the pattern {variable_name} where - variable_name starts with a letter/underscore and contains only letters, numbers, and underscores. + +def _evaluate_accessors(base_value: Any, accessor_string: str) -> Any: + """ + Safely evaluate a chain of attribute and item accessors on a base value. Args: - input_string: The string containing template variables to interpolate. - Can be None or empty, in which case an empty string is returned. - inputs: Dictionary mapping template variables to their values. - Supported value types are strings, integers, floats, and dicts/lists - containing only these types and other nested dicts/lists. + base_value: The initial value (e.g., from the inputs dictionary). + accessor_string: A string representing chained accessors like + '.attribute', '[0]', '['key']', '.attr[0]['key']'. + Whitespace around dots and brackets is ignored. Returns: - The interpolated string with all template variables replaced with their values. - Empty string if input_string is None or empty. + The final value after applying all accessors. Raises: - ValueError: If a value contains unsupported types or a template variable is missing + KeyError: If a dictionary key or list index is not found or invalid. + AttributeError: If an attribute is not found. + TypeError: If an object is not subscriptable or doesn't support attribute access + at the point the accessor is applied. + ValueError: If the accessor string format is invalid or cannot be parsed. + RuntimeError: For other unexpected errors during evaluation. """ + current_value = base_value + remaining_accessor_string = accessor_string.strip() - # Validation function for recursive type checking - def validate_type(value: Any) -> None: - if value is None: - return - if isinstance(value, (str, int, float, bool)): - return - if isinstance(value, (dict, list)): - for item in value.values() if isinstance(value, dict) else value: - validate_type(item) - return - raise ValueError( - f"Unsupported type {type(value).__name__} in inputs. " - "Only str, int, float, bool, dict, and list are allowed." - ) + while remaining_accessor_string: + match = _ACCESSOR_PATTERN.match(remaining_accessor_string) + if not match: + raise ValueError( + f"Invalid accessor format near '{remaining_accessor_string}' " + f"in full accessor string '{accessor_string}'" + ) + + # Unpack all 7 potential groups - some will be None depending on match + ( + dot_full, + attr_name, + bracket_full, + bracket_content_raw, + index_str, + single_quoted_key, + double_quoted_key, + ) = match.groups() + full_match_str = match.group(0) # The actual matched string portion - # Validate all input values - for key, value in inputs.items(): try: - validate_type(value) - except ValueError as e: - raise ValueError(f"Invalid value for key '{key}': {str(e)}") from e + if dot_full: # Matched dot accessor (Group 1 is not None) + # Attribute access (.attribute) - Use attr_name (Group 2) + if isinstance(current_value, dict): + try: + # Try dict key access first + current_value = current_value[attr_name] + except KeyError: + # Fallback to attribute access on the dict itself or contained object + current_value = getattr(current_value, attr_name) + else: + # Standard attribute access for non-dicts + current_value = getattr(current_value, attr_name) + + else: # Matched bracket accessor (bracket_full / Group 3 is not None) + key: Union[str, int] + if index_str: # Matched integer index (Group 5 is not None) + key = int(index_str) + current_value = current_value[key] + else: # Matched string key (Group 6 or 7 is not None) + # Determine key content from single (Group 6) or double (Group 7) quoted group + raw_key = ( + single_quoted_key + if single_quoted_key is not None + else double_quoted_key + ) + # Handle potential escapes within the key string + key = raw_key.encode().decode("unicode_escape") + current_value = current_value[key] + + # --- Error Handling specific to the type of access --- + except AttributeError: + # Error message should use attr_name if dot access failed + failed_name = attr_name if dot_full else "attribute" + raise AttributeError( + f"Attribute '{failed_name}' not found on object " + f"of type {type(current_value).__name__} while evaluating " + f"'{accessor_string}'" + ) from None # Use None to break chain, more direct error + except KeyError: + # Determine key_repr based on what matched for better error messages + if index_str: + key_repr = index_str + error_type = "Index" + else: + # Use bracket_content_raw (Group 4) which includes quotes, or reconstruct from key + key_repr = repr(key) if "key" in locals() else bracket_content_raw + error_type = "Key" + raise KeyError( + f"{error_type} {key_repr} not found or invalid for object " + f"of type {type(current_value).__name__} while evaluating " + f"'{accessor_string}'" + ) from None + except IndexError: # More specific than KeyError for sequences + # Use index_str (Group 5) for the message if available + idx_val = index_str if index_str else "unknown" + raise KeyError( # Raising KeyError for consistency with original tests + f"Index {idx_val} out of bounds for sequence of length {len(current_value)} " + f"while evaluating '{accessor_string}'" + ) from None + except TypeError as e: + # E.g., trying to index a non-subscriptable object or getattr on non-object + raise TypeError( + f"Object of type {type(current_value).__name__} does not support " + f"{'attribute' if dot_full else 'item'} access " + f"needed for '{full_match_str}' in '{accessor_string}': {e}" + ) from None + except Exception as e: # Catch unexpected errors during access + raise RuntimeError( + f"Unexpected error accessing '{full_match_str}' in " + f"'{accessor_string}': {type(e).__name__}: {e}" + ) from e + + # Move to the next part of the accessor string + remaining_accessor_string = remaining_accessor_string[match.end() :].lstrip() + + return current_value + + +def interpolate_only( + input_string: Optional[str], + inputs: Dict[str, Any], # Allow Any type, validation happens during access +) -> str: + """ + Interpolates placeholders in a string using values from a dictionary, + handling nested attribute and item access, while leaving JSON-like + structures untouched. - if input_string is None or not input_string: + Placeholders follow the format {variable_name.accessor[index]['key']...}. + - `variable_name` must start with a letter or underscore, followed by + letters, numbers, or underscores. + - Accessors (`.`, `[]`) allow navigating nested objects and lists/dicts. + + Args: + input_string: The string containing template variables. Returns "" if + None or empty. + inputs: Dictionary mapping base variable names to their values. + + Returns: + The interpolated string. + + Raises: + KeyError: If a base template variable (e.g., {variable}) or a key/index + during accessor evaluation is not found. + AttributeError: If an attribute accessed via dot notation is not found. + TypeError: If an object does not support the required access method + (e.g., indexing a non-subscriptable object). + ValueError: If the accessor format within a placeholder is invalid, + or if `inputs` is empty when placeholders exist, or if + a base variable references an object that cannot be directly + converted to a string (and has no accessors). + RuntimeError: For other unexpected errors during evaluation. + """ + if not input_string: return "" - if "{" not in input_string and "}" not in input_string: + # Optimization: Quick check for presence of { and } + if not ("{" in input_string and "}" in input_string): return input_string - if not inputs: - raise ValueError( - "Inputs dictionary cannot be empty when interpolating variables" + + # Regex to find placeholders like {var}, {var.attr}, {var[0]}, {var['key']} + # Group 1: Base variable name (e.g., "var") + # Group 2: The chain of accessors (e.g., ".attr[0]['key']") - may include whitespace + placeholder_pattern = re.compile( + r"\{" + r"([A-Za-z_][A-Za-z0-9_]*)" # 1: Base variable name + r"((?:\s*(?:\.|\[).*?)*?)" # 2: Accessors (dot or bracket), non-greedy, allows whitespace + r"\s*\}" # Allow whitespace before closing brace + ) + + # Use a set to collect missing base variables for a single final error + missing_base_vars = set() + # Use a list to store parts of the final string + result_parts = [] + last_end = 0 + found_placeholder = False # Flag to check if any placeholder was encountered + + for match in placeholder_pattern.finditer(input_string): + found_placeholder = ( + True # Mark that we found at least one potential placeholder ) + start, end = match.span() + # Add the text segment before the current match + result_parts.append(input_string[last_end:start]) + + full_placeholder = match.group(0) # e.g., { list_variable [ 0 ] } + base_var = match.group(1) # e.g., list_variable + accessors = match.group( + 2 + ).strip() # e.g., [ 0 ] -> "[0]" or . attribute -> ".attribute" + + # Check for empty inputs dict only if we actually find a placeholder + if not inputs: + raise ValueError( + "Inputs dictionary cannot be empty when template variables " + f"like '{full_placeholder}' are present." + ) + + if base_var not in inputs: + # Base variable is missing, record it and keep the placeholder + missing_base_vars.add(base_var) + result_parts.append(full_placeholder) + else: + # Base variable exists, try to evaluate + try: + value = inputs[base_var] + if accessors: + # Pass the potentially complex accessor string to the evaluator + value = _evaluate_accessors(value, accessors) - # The regex pattern to find valid variable placeholders - # Matches {variable_name} where variable_name starts with a letter/underscore - # and contains only letters, numbers, and underscores - pattern = r"\{([A-Za-z_][A-Za-z0-9_]*)\}" + # Check if the final value is directly interpolatable + # Allow basic types, None. Disallow others *unless* accessed. + if not accessors and not isinstance( + value, (str, int, float, bool, type(None)) + ): + # Special case: allow lists/dicts even without accessors, as they have default string representations + if not isinstance(value, (list, dict)): + raise ValueError( + f"Variable '{base_var}' resolved to an unsupported type " + f"({type(value).__name__}) for direct interpolation without accessors. " + f"Use accessors (e.g., {base_var}.attribute) or ensure " + f"the value is a primitive type, list, or dict." + ) - # Find all matching variables in the input string - variables = re.findall(pattern, input_string) - result = input_string + # Convert the final evaluated value to string for joining + result_parts.append(str(value)) - # Check if all variables exist in inputs - missing_vars = [var for var in variables if var not in inputs] - if missing_vars: + except ( + KeyError, + IndexError, + AttributeError, + TypeError, + ValueError, + RuntimeError, + ) as e: + # Evaluation failed (e.g., bad index/key/attribute, invalid accessor) + # Re-raise the specific error caught by _evaluate_accessors or the ValueError from above + # Add context about the placeholder being processed. + # Modify the exception args to prepend context. + error_args = list(e.args) + # Ensure error_args is not empty before modifying + if error_args: + error_args[0] = ( + f"Error evaluating placeholder {full_placeholder}: {error_args[0]}" + ) + else: + # Add a generic message if args was empty + error_args.append( + f"Error evaluating placeholder {full_placeholder}: {type(e).__name__}" + ) + e.args = tuple(error_args) + raise e # Re-raise the original exception type with modified message + except Exception as e: # Catch any other unexpected errors + raise RuntimeError( + f"Unexpected error evaluating placeholder {full_placeholder}: " + f"{type(e).__name__}: {e}" + ) from e + + last_end = end # Update position for the next segment + + # Add any remaining text after the last placeholder + result_parts.append(input_string[last_end:]) + + # After checking all placeholders, raise error if any base variables were missing + if missing_base_vars: raise KeyError( - f"Template variable '{missing_vars[0]}' not found in inputs dictionary" + f"Template variable(s) {{{', '.join(sorted(list(missing_base_vars)))}}} " + f"not found in inputs." ) - # Replace each variable with its value - for var in variables: - if var in inputs: - placeholder = "{" + var + "}" - value = str(inputs[var]) - result = result.replace(placeholder, value) + # If no placeholders were found by the regex, return the original string + # (This handles cases where {} exist but don't match the variable pattern) + if not found_placeholder and not missing_base_vars: + return input_string - return result + return "".join(result_parts) diff --git a/tests/utilities/test_string_utils.py b/tests/utilities/test_string_utils.py index 441aae8c09..6438ceed7b 100644 --- a/tests/utilities/test_string_utils.py +++ b/tests/utilities/test_string_utils.py @@ -150,11 +150,11 @@ def test_complex_mixed_scenario(self): """Test a complex scenario with both valid variables and JSON structures.""" template = """ {agent_name} is working on task {task_id}. - + Instructions: 1. Process the data 2. Return results as: - + { "taskId": "{task_id}", "results": { @@ -185,3 +185,47 @@ def test_empty_inputs_dictionary(self): interpolate_only(template, inputs) assert "inputs dictionary cannot be empty" in str(excinfo.value).lower() + + def test_list_variables_indexing(self): + template = """ + {list_variable[0]} + """ + + inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]] = { + "list_variable": ["item1", "item2"] + } + + results = interpolate_only(template, inputs) + + assert "item1" in results + + def test_dict_variables(self): + template = """ + {list_variable.key1} + """ + + inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]] = { + "list_variable": {"key1": "item1", "key2": "item2"} + } + + results = interpolate_only(template, inputs) + + assert "item1" in results + + def test_mixed_variables(self): + template = """ + {list_variable[0].key1} + {list_variable[1].key2} + """ + + inputs: Dict[str, Union[str, int, float, Dict[str, Any], List[Any]]] = { + "list_variable": [ + {"key1": "item1", "key2": "item2"}, + {"key1": "item3", "key2": "item4"}, + ] + } + + results = interpolate_only(template, inputs) + + assert "item1" in results + assert "item4" in results From 3993d6bdb4a8cfdfb3e0451659d468a2dd9c5e8a Mon Sep 17 00:00:00 2001 From: "Ruben T." Date: Wed, 23 Apr 2025 15:47:59 -0600 Subject: [PATCH 2/2] Update string_utils.py removed comment --- src/crewai/utilities/string_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/crewai/utilities/string_utils.py b/src/crewai/utilities/string_utils.py index 2cce1f9427..3e2f218876 100644 --- a/src/crewai/utilities/string_utils.py +++ b/src/crewai/utilities/string_utils.py @@ -1,4 +1,3 @@ -# /home/ruby/PycharmProjects/crewAI/src/crewai/utilities/string_utils.py import re from typing import Any, Dict, Optional, Union