Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions src/sktime_mcp/registry/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,37 @@

logger = logging.getLogger(__name__)

# Maximum characters of docstring to expose to LLMs in to_dict() output.
# Earlier versions truncated at 500 chars, but sktime's numpydoc-style
# docstrings put the "Parameters" section, the most useful information
# for an agent reasoning about hyperparameters, past the first 500
# chars in many estimators (issue #335). Bumping the cap to 4000 keeps
# the response compact enough for an MCP tool result while preserving
# the Parameters section for the vast majority of sktime estimators.
_DOCSTRING_MAX_CHARS = 4000


def _truncate_docstring(docstring: str | None) -> str | None:
"""Return *docstring* truncated to a length that preserves the
Parameters section when possible.

The function:
- returns ``None`` for a missing docstring;
- returns the full docstring if it is already short enough;
- otherwise returns the first ``_DOCSTRING_MAX_CHARS`` characters,
with a trailing ellipsis when content was dropped.

Per #335 the cap is large enough to capture the numpydoc
``Parameters`` block of every estimator we have looked at without
embedding the full docstring (which can run several pages on
composite forecasters).
"""
if docstring is None:
return None
if len(docstring) <= _DOCSTRING_MAX_CHARS:
return docstring
return docstring[:_DOCSTRING_MAX_CHARS].rstrip() + "..."


@dataclass
class EstimatorNode:
Expand Down Expand Up @@ -47,9 +78,7 @@ def to_dict(self) -> dict[str, Any]:
"module": self.module,
"tags": self.tags,
"hyperparameters": self.hyperparameters,
"docstring": (
self.docstring[:500] if self.docstring else None
), # L-1: Truncate docstring to 500 characters, we can also try summarization
"docstring": _truncate_docstring(self.docstring),
}

def to_summary(self) -> dict[str, Any]:
Expand Down
45 changes: 45 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,48 @@ def fake_save_model(**kwargs):

if __name__ == "__main__":
pytest.main([__file__, "-v"])


class TestDocstringTruncation:
"""Regression test for issue #335, bump the docstring truncation
cap so the numpydoc ``Parameters`` section, which appears past
the first 500 chars in many sktime estimators, survives the
EstimatorNode.to_dict serialisation."""

def test_short_docstring_unchanged(self):
from sktime_mcp.registry.interface import _truncate_docstring

assert _truncate_docstring("hi") == "hi"

def test_none_passes_through(self):
from sktime_mcp.registry.interface import _truncate_docstring

assert _truncate_docstring(None) is None

def test_long_docstring_truncated_with_ellipsis(self):
from sktime_mcp.registry.interface import (
_DOCSTRING_MAX_CHARS,
_truncate_docstring,
)

long_doc = "x" * (_DOCSTRING_MAX_CHARS + 50)
out = _truncate_docstring(long_doc)
assert out is not None
assert out.endswith("...")
# Plain content (without ellipsis) is at most the cap.
assert len(out) - 3 <= _DOCSTRING_MAX_CHARS

def test_cap_preserves_parameters_section_past_500_chars(self):
"""The whole point of #335: Parameters section appears past 500
chars in many sktime docstrings, and the new cap must keep it."""
from sktime_mcp.registry.interface import _truncate_docstring

intro = "x" * 600
doc = (
intro
+ "\n\nParameters\n----------\nn_neighbors : int, default=5\n"
)
out = _truncate_docstring(doc)
assert out is not None
assert "Parameters" in out
assert "n_neighbors" in out