diff --git a/google/genai/_transformers.py b/google/genai/_transformers.py index a156080f..285d996f 100644 --- a/google/genai/_transformers.py +++ b/google/genai/_transformers.py @@ -582,6 +582,42 @@ def handle_null_fields(schema: dict[str, Any]): del schema['anyOf'] +def _is_schema_too_large(schema: dict[str, Any]) -> bool: + """Checks if the schema is too large. + + Args: + schema: The schema to check. + + Returns: + True if the schema is too large, False otherwise. + """ + # The maximum size of the schema is 10000 characters. + # This is a conservative estimate based on the "too many states for serving" error. + schema_str = str(schema) + return len(schema_str) > 10000 + + +def _strip_titles(schema: dict[str, Any]) -> None: + """Recursively strips titles from a schema and its sub-schemas. + + Args: + schema: The schema to strip titles from. + """ + if 'title' in schema: + del schema['title'] + + if schema.get('type', '').upper() == 'OBJECT': + if (properties := schema.get('properties')) is not None: + for sub_schema in properties.values(): + _strip_titles(sub_schema) + elif schema.get('type', '').upper() == 'ARRAY': + if (items := schema.get('items')) is not None: + _strip_titles(items) + elif 'anyOf' in schema: + for sub_schema in schema['anyOf']: + _strip_titles(sub_schema) + + def process_schema( schema: dict[str, Any], client: _api_client.BaseApiClient, @@ -591,63 +627,11 @@ def process_schema( ): """Updates the schema and each sub-schema inplace to be API-compatible. - - Inlines the $defs. - - Example of a schema before and after (with mldev): - Before: - - `schema` - - { - 'items': { - '$ref': '#/$defs/CountryInfo' - }, - 'title': 'Placeholder', - 'type': 'array' - } - - - `defs` - - { - 'CountryInfo': { - 'properties': { - 'continent': { - 'title': 'Continent', - 'type': 'string' - }, - 'gdp': { - 'title': 'Gdp', - 'type': 'integer'} - }, - } - 'required':['continent', 'gdp'], - 'title': 'CountryInfo', - 'type': 'object' - } - } - - After: - - `schema` - { - 'items': { - 'properties': { - 'continent': { - 'title': 'Continent', - 'type': 'string' - }, - 'gdp': { - 'title': 'Gdp', - 'type': 'integer' - }, - } - 'required':['continent', 'gdp'], - 'title': 'CountryInfo', - 'type': 'object' - }, - 'type': 'array' - } + Args: + schema: The schema to process. + client: The API client. + defs: The definitions. + order_properties: Whether to order the properties. """ if not client.vertexai: if schema.get('default') is not None: @@ -727,6 +711,10 @@ def _recurse(sub_schema: dict[str, Any]) -> dict[str, Any]: if (items := schema.get('items')) is not None: schema['items'] = _recurse(items) + # Check if the schema is too large and, if so, strip the titles from all properties + if _is_schema_too_large(schema): + _strip_titles(schema) + def _process_enum( enum: EnumMeta, client: _api_client.BaseApiClient diff --git a/google/genai/tests/transformers/test_schema.py b/google/genai/tests/transformers/test_schema.py index d9f96350..6e4f10a8 100644 --- a/google/genai/tests/transformers/test_schema.py +++ b/google/genai/tests/transformers/test_schema.py @@ -560,3 +560,189 @@ def test_t_schema_does_not_set_property_ordering_for_schema_type(client): with pytest.raises(ValueError) as e: _transformers.t_schema(client, schema) assert 'Default value is not supported' in str(e) + + +def test_is_schema_too_large(): + """Tests the _is_schema_too_large function.""" + schema = { + 'type': 'object', + 'properties': { + 'foo': { + 'type': 'string', + 'title': 'Foo', + }, + 'bar': { + 'type': 'integer', + 'title': 'Bar', + }, + }, + } + assert not _transformers._is_schema_too_large(schema) + + # Create a schema that is too large. + large_schema = { + 'type': 'object', + 'properties': { + 'foo': { + 'type': 'string', + 'title': 'Foo' * 5000, # Make the title much longer + }, + 'bar': { + 'type': 'integer', + 'title': 'Bar' * 5000, # Make the title much longer + }, + }, + } + assert _transformers._is_schema_too_large(large_schema) + + +def test_process_schema_strips_titles_if_too_large(): + """Tests that the process_schema function strips titles from properties if the schema is too large.""" + schema = { + 'type': 'object', + 'properties': { + 'foo': { + 'type': 'string', + 'title': 'Foo', + }, + 'bar': { + 'type': 'integer', + 'title': 'Bar', + }, + }, + } + client = google_genai_client_module.Client(api_key='test-api-key') + _transformers.process_schema(schema, client) + assert 'title' in schema['properties']['foo'] + assert 'title' in schema['properties']['bar'] + + # Create a schema that is too large. + large_schema = { + 'type': 'object', + 'properties': { + 'foo': { + 'type': 'string', + 'title': 'Foo' * 5000, + }, + 'bar': { + 'type': 'integer', + 'title': 'Bar' * 5000, + }, + }, + } + _transformers.process_schema(large_schema, client) + assert 'title' not in large_schema['properties']['foo'] + assert 'title' not in large_schema['properties']['bar'] + + +def test_strip_titles(): + """Tests that _strip_titles correctly removes titles from a schema.""" + schema = { + 'type': 'OBJECT', + 'title': 'Root', + 'properties': { + 'foo': { + 'type': 'STRING', + 'title': 'Foo', + }, + 'bar': { + 'type': 'OBJECT', + 'title': 'Bar', + 'properties': { + 'baz': { + 'type': 'INTEGER', + 'title': 'Baz', + }, + }, + }, + 'qux': { + 'type': 'ARRAY', + 'title': 'Qux', + 'items': { + 'type': 'STRING', + 'title': 'QuxItem', + }, + }, + 'quux': { + 'title': 'Quux', + 'anyOf': [ + { + 'type': 'STRING', + 'title': 'QuuxString', + }, + { + 'type': 'INTEGER', + 'title': 'QuuxInt', + }, + ], + }, + }, + } + + _transformers._strip_titles(schema) + + # Check that all titles have been removed + assert 'title' not in schema + assert 'title' not in schema['properties']['foo'] + assert 'title' not in schema['properties']['bar'] + assert 'title' not in schema['properties']['bar']['properties']['baz'] + assert 'title' not in schema['properties']['qux'] + assert 'title' not in schema['properties']['qux']['items'] + assert 'title' not in schema['properties']['quux'] + assert 'title' not in schema['properties']['quux']['anyOf'][0] + assert 'title' not in schema['properties']['quux']['anyOf'][1] + + +def test_process_schema_strips_titles_when_too_large(): + """Tests that process_schema strips titles when the schema is too large.""" + client = google_genai_client_module.Client(api_key='test-api-key') + + # Create a schema that will be too large due to long titles + large_schema = { + 'type': 'OBJECT', + 'title': 'Root' * 1000, + 'properties': { + 'foo': { + 'type': 'STRING', + 'title': 'Foo' * 1000, + }, + 'bar': { + 'type': 'INTEGER', + 'title': 'Bar' * 1000, + }, + }, + } + + _transformers.process_schema(large_schema, client) + + # Check that all titles have been removed + assert 'title' not in large_schema + assert 'title' not in large_schema['properties']['foo'] + assert 'title' not in large_schema['properties']['bar'] + + +def test_process_schema_preserves_titles_when_not_too_large(): + """Tests that process_schema preserves titles when the schema is not too large.""" + client = google_genai_client_module.Client(api_key='test-api-key') + + schema = { + 'type': 'OBJECT', + 'title': 'Root', + 'properties': { + 'foo': { + 'type': 'STRING', + 'title': 'Foo', + }, + 'bar': { + 'type': 'INTEGER', + 'title': 'Bar', + }, + }, + } + + _transformers.process_schema(schema, client) + + # Check that all titles are preserved + assert schema['title'] == 'Root' + assert schema['properties']['foo']['title'] == 'Foo' + assert schema['properties']['bar']['title'] == 'Bar'