Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the the ParsedTypeDocstring #874

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ec6c907
Make the linker always output link tags only. No <code> tags. The <co…
tristanlatr Feb 2, 2025
f06cd12
Fix #723 and #581
tristanlatr Feb 2, 2025
788957d
Fix the linenumber issue in the new references
tristanlatr Feb 2, 2025
aefe3c7
Help mypy
tristanlatr Feb 2, 2025
c6ffcef
trying this...
tristanlatr Feb 2, 2025
a8d9af8
Revert "trying this..."
tristanlatr Feb 2, 2025
30bdf2b
Turns out this refactors fixes an obscure bug that would trigger unex…
tristanlatr Feb 2, 2025
ec37bff
Try to fix mypy
tristanlatr Feb 2, 2025
36f6eba
Fix tests of docs
tristanlatr Feb 2, 2025
bd9d457
Showcase the literak choices of google/numpy in the demo
tristanlatr Feb 6, 2025
ed1e6d9
Re-enable spelling extension
tristanlatr Feb 13, 2025
5f5542a
Fix the numpy-style type in the demo
tristanlatr Feb 14, 2025
042f9dc
Actually fix a couple of issues:
tristanlatr Feb 14, 2025
c68853b
Merge branch '873-implement-parsedtypedocstring.to_node' of github.co…
tristanlatr Feb 14, 2025
6703a1f
This changes simplify the parsed type docstring and makes the logic l…
tristanlatr Feb 14, 2025
a554adc
Properly add regression test for the duplicated type attribute bug
tristanlatr Feb 14, 2025
742850a
fix pyflakes
tristanlatr Feb 14, 2025
2681285
get_lineno refactor
tristanlatr Feb 17, 2025
dace95f
Revert "get_lineno refactor"
tristanlatr Feb 17, 2025
40a8623
Simplification now that the nested warnings are not useful
tristanlatr Feb 17, 2025
32cbcfa
add a comment to get_lineno
tristanlatr Feb 19, 2025
8359c43
Merge branch 'master' into 873-implement-parsedtypedocstring.to_node
tristanlatr Feb 19, 2025
cba36ab
Merge branch 'master' into 873-implement-parsedtypedocstring.to_node
tristanlatr Feb 19, 2025
7eb22b7
Remove unused import
tristanlatr Feb 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/google_demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def function_with_types_in_docstring(param1, param2):
Args:
param1 (int): The first parameter.
param2 (str): The second parameter.
param2 (str : {"html", "json", "xml"}, optional): The second parameter.
Returns:
bool: The return value. True for success, False otherwise.
Expand Down
2 changes: 1 addition & 1 deletion docs/numpy_demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def function_with_types_in_docstring(param1, param2):
----------
param1 : int
The first parameter.
param2 : str
param2 : str : {"html", "json", "xml"}, optional
The second parameter.

Returns
Expand Down
1 change: 0 additions & 1 deletion docs/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def test_search(query:str, expected:List[str], order_is_important:bool=True) ->
to_stan_results = [
'pydoctor.epydoc.markup.ParsedDocstring.to_stan',
'pydoctor.epydoc.markup.plaintext.ParsedPlaintextDocstring.to_stan',
'pydoctor.epydoc.markup._types.ParsedTypeDocstring.to_stan',
'pydoctor.epydoc.markup._pyval_repr.ColorizedPyvalRepr.to_stan',
'pydoctor.epydoc2stan.ParsedStanOnly.to_stan',
]
Expand Down
11 changes: 7 additions & 4 deletions pydoctor/epydoc/docutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"""
from __future__ import annotations

from typing import Iterable, Iterator, Optional, TypeVar, cast
from typing import Iterable, Iterator, Optional, TypeVar, cast, TYPE_CHECKING

if TYPE_CHECKING:
from typing import Literal

import optparse

Expand All @@ -14,11 +17,11 @@

_DEFAULT_DOCUTILS_SETTINGS: Optional[optparse.Values] = None

def new_document(source_path: str, settings: Optional[optparse.Values] = None) -> nodes.document:
def new_document(source: Literal['docstring', 'code'], settings: Optional[optparse.Values] = None) -> nodes.document:
"""
Create a new L{nodes.document} using the provided settings or cached default settings.

@returns: L{nodes.document}
@returns: L{nodes.document} which a C{source} attribute that matches the provided source.
"""
global _DEFAULT_DOCUTILS_SETTINGS
# If we have docutils >= 0.19 we use get_default_settings to calculate and cache
Expand All @@ -29,7 +32,7 @@ def new_document(source_path: str, settings: Optional[optparse.Values] = None) -

settings = _DEFAULT_DOCUTILS_SETTINGS

return utils.new_document(source_path, settings)
return utils.new_document(source, settings)

def _set_nodes_parent(nodes: Iterable[nodes.Node], parent: nodes.Element) -> Iterator[nodes.Node]:
"""
Expand Down
11 changes: 6 additions & 5 deletions pydoctor/epydoc/markup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def get_supported_docformats() -> Iterator[str]:

def get_parser_by_name(docformat: str, objclass: ObjClass | None = None) -> ParserFunction:
"""
Get the C{parse_docstring(str, List[ParseError], bool) -> ParsedDocstring} function based on a parser name.
Get the C{parse_docstring(str, List[ParseError]) -> ParsedDocstring} function based on a parser name.

@raises ImportError: If the parser could not be imported, probably meaning that your are missing a dependency
or it could be that the docformat name do not match any know L{pydoctor.epydoc.markup} submodules.
Expand All @@ -112,7 +112,7 @@ def _processtypes(doc: 'ParsedDocstring', errs: List['ParseError']) -> None:
for field in doc.fields:
if field.tag() in ParsedTypeDocstring.FIELDS:
body = ParsedTypeDocstring(field.body().to_node(), lineno=field.lineno)
append_warnings(body.warnings, errs, lineno=field.lineno+1)
append_warnings(body.warnings, errs, lineno=field.lineno)
field.replace_body(body)

def parse_and_processtypes(doc:str, errs:List['ParseError']) -> 'ParsedDocstring':
Expand Down Expand Up @@ -150,7 +150,8 @@ def __init__(self, fields: Sequence['Field']):
self._stan: Optional[Tag] = None
self._summary: Optional['ParsedDocstring'] = None

@abc.abstractproperty
@property
@abc.abstractmethod
def has_body(self) -> bool:
"""
Does this docstring have a non-empty body?
Expand All @@ -168,7 +169,7 @@ def get_toc(self, depth: int) -> Optional['ParsedDocstring']:
except NotImplementedError:
return None
contents = build_table_of_content(document, depth=depth)
docstring_toc = new_document('toc')
docstring_toc = new_document('docstring')
if contents:
docstring_toc.extend(contents)
from pydoctor.epydoc.markup.restructuredtext import ParsedRstDocstring
Expand Down Expand Up @@ -439,7 +440,7 @@ def visit_paragraph(self, node: nodes.paragraph) -> None:
self.other_docs = True
raise nodes.StopTraversal()

summary_doc = new_document('summary')
summary_doc = new_document('docstring')
summary_pieces: list[nodes.Node] = []

# Extract the first sentences from the first paragraph until maximum number
Expand Down
2 changes: 1 addition & 1 deletion pydoctor/epydoc/markup/_pyval_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def colorize(self, pyval: Any) -> ColorizedPyvalRepr:
is_complete = True

# Put it all together.
document = new_document('pyval_repr')
document = new_document('code')
# This ensure the .parent and .document attributes of the child nodes are set correcly.
set_node_attributes(document, children=[set_node_attributes(node, document=document) for node in state.result])
return ColorizedPyvalRepr(document, is_complete, state.warnings)
Expand Down
169 changes: 60 additions & 109 deletions pydoctor/epydoc/markup/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
"""
from __future__ import annotations

from typing import Any, Callable, Dict, List, Tuple, Union, cast
from typing import Callable, Dict, List, Union, cast

from pydoctor.epydoc.markup import DocstringLinker, ParseError, ParsedDocstring, get_parser_by_name
from pydoctor.node2stan import node2stan
from pydoctor.epydoc.markup import ParseError, ParsedDocstring
from pydoctor.epydoc.markup._pyval_repr import PyvalColorizer
from pydoctor.napoleon.docstring import TokenType, TypeDocstring
from pydoctor.epydoc.docutils import new_document, set_node_attributes

from docutils import nodes
from twisted.web.template import Tag, tags

# TODO: This class should use composition instead of multiple inheritence...
class ParsedTypeDocstring(TypeDocstring, ParsedDocstring):
"""
Add L{ParsedDocstring} interface on top of L{TypeDocstring} and
Expand All @@ -25,38 +26,25 @@
# yes this overrides the superclass type!
_tokens: list[tuple[str | nodes.Node, TokenType]] # type: ignore

def __init__(self, annotation: Union[nodes.document, str],
def __init__(self, annotation: nodes.document,
warns_on_unknown_tokens: bool = False, lineno: int = 0) -> None:
ParsedDocstring.__init__(self, ())
if isinstance(annotation, nodes.document):
TypeDocstring.__init__(self, '', warns_on_unknown_tokens)
TypeDocstring.__init__(self, '', warns_on_unknown_tokens)

_tokens = self._tokenize_node_type_spec(annotation)
self._tokens = cast('list[tuple[str | nodes.Node, TokenType]]',
self._build_tokens(_tokens))
self._trigger_warnings()
else:
TypeDocstring.__init__(self, annotation, warns_on_unknown_tokens)

_tokens = self._tokenize_node_type_spec(annotation)
self._tokens = cast('list[tuple[str | nodes.Node, TokenType]]',
self._build_tokens(_tokens))
self._trigger_warnings()

# We need to store the line number because we need to pass it to DocstringLinker.link_xref
self._lineno = lineno
self._document = self._parse_tokens()

@property
def has_body(self) -> bool:
return len(self._tokens)>0

def to_node(self) -> nodes.document:
"""
Not implemented.
"""
raise NotImplementedError()

def to_stan(self, docstring_linker: DocstringLinker) -> Tag:
"""
Present the type as a stan tree.
"""
return self._convert_type_spec_to_stan(docstring_linker)
return self._document

def _tokenize_node_type_spec(self, spec: nodes.document) -> List[Union[str, nodes.Node]]:
def _warn_not_supported(n:nodes.Node) -> None:
Expand Down Expand Up @@ -84,97 +72,60 @@

return tokens

def _convert_obj_tokens_to_stan(self, tokens: List[Tuple[Any, TokenType]],
docstring_linker: DocstringLinker) -> list[tuple[Any, TokenType]]:
"""
Convert L{TokenType.OBJ} and PEP 484 like L{TokenType.DELIMITER} type to stan, merge them together. Leave the rest untouched.

Exemple:

>>> tokens = [("list", TokenType.OBJ), ("(", TokenType.DELIMITER), ("int", TokenType.OBJ), (")", TokenType.DELIMITER)]
>>> ann._convert_obj_tokens_to_stan(tokens, NotFoundLinker())
... [(Tag('code', children=['list', '(', 'int', ')']), TokenType.OBJ)]

@param tokens: List of tuples: C{(token, type)}
"""

combined_tokens: list[tuple[Any, TokenType]] = []

open_parenthesis = 0
open_square_braces = 0

for _token, _type in tokens:
# The actual type of_token is str | Tag | Node.

if (_type is TokenType.DELIMITER and _token in ('[', '(', ')', ']')) \
or _type is TokenType.OBJ:
if _token == "[": open_square_braces += 1
elif _token == "(": open_parenthesis += 1

if _type is TokenType.OBJ:
_token = docstring_linker.link_xref(
_token, _token, self._lineno)

if open_square_braces + open_parenthesis > 0:
try: last_processed_token = combined_tokens[-1]
except IndexError:
combined_tokens.append((_token, _type))
else:
if last_processed_token[1] is TokenType.OBJ \
and isinstance(last_processed_token[0], Tag):
# Merge with last Tag
if _type is TokenType.OBJ:
assert isinstance(_token, Tag)
last_processed_token[0](*_token.children)
else:
last_processed_token[0](_token)
else:
combined_tokens.append((_token, _type))
else:
combined_tokens.append((_token, _type))

if _token == "]": open_square_braces -= 1
elif _token == ")": open_parenthesis -= 1

else:
# the token will be processed in _convert_type_spec_to_stan() method.
combined_tokens.append((_token, _type))

return combined_tokens
Comment on lines -100 to -143
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the unfortunate code

_converters: Dict[TokenType, Callable[[str, list[ParseError], int], nodes.Node]] = {
# we're re-using the variable string css
# class for the whole literal token, it's the
# best approximation we have for now.
TokenType.LITERAL: lambda _token, _, __: \
nodes.inline(_token, _token, classes=[PyvalColorizer.STRING_TAG]),

TokenType.CONTROL: lambda _token, _, __: \
nodes.emphasis(_token, _token),

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change


TokenType.OBJ: lambda _token, _, lineno: \
set_node_attributes(nodes.title_reference(_token, _token),
lineno=lineno),
}

def _convert_type_spec_to_stan(self, docstring_linker: DocstringLinker) -> Tag:
def _parse_tokens(self) -> nodes.document:
"""
Convert type to L{Tag} object.
Convert type to docutils document object.
"""

tokens = self._convert_obj_tokens_to_stan(self._tokens, docstring_linker)

document = new_document('code')
warnings: List[ParseError] = []
converters = self._converters
lineno = self._lineno

converters: Dict[TokenType, Callable[[Union[str, Tag]], Union[str, Tag]]] = {
TokenType.LITERAL: lambda _token: tags.span(_token, class_="literal"),
TokenType.CONTROL: lambda _token: tags.em(_token),
# We don't use safe_to_stan() here, if these converter functions raise an exception,
# the whole type docstring will be rendered as plaintext.
# it does not crash on invalid xml entities
TokenType.REFERENCE: lambda _token: get_parser_by_name('restructuredtext')(_token, warnings).to_stan(docstring_linker) if isinstance(_token, str) else _token,
TokenType.UNKNOWN: lambda _token: get_parser_by_name('restructuredtext')(_token, warnings).to_stan(docstring_linker) if isinstance(_token, str) else _token,
TokenType.OBJ: lambda _token: _token, # These convertions (OBJ and DELIMITER) are done in _convert_obj_tokens_to_stan().
TokenType.DELIMITER: lambda _token: _token,
TokenType.ANY: lambda _token: _token,
}
elements: list[nodes.Node] = []
default = lambda _token, _, __: nodes.Text(_token)

for token, type_ in self._tokens:
assert token is not None
converted_token: nodes.Node

if type_ is TokenType.ANY:
assert isinstance(token, nodes.Node)
converted_token = token
else:
assert isinstance(token, str)
converted_token = converters.get(type_, default)(token, warnings, lineno)

if isinstance(converted_token, nodes.document):
elements.extend((set_node_attributes(t, document=document)

Check warning on line 116 in pydoctor/epydoc/markup/_types.py

View check run for this annotation

Codecov / codecov/patch

pydoctor/epydoc/markup/_types.py#L116

Added line #L116 was not covered by tests
for t in converted_token.children))
else:
elements.append(set_node_attributes(converted_token,
document=document))
# warnings should be appended once we have called all converters.
for w in warnings:
self.warnings.append(w.descr())

converted = Tag('')

for token, type_ in tokens:
assert token is not None
if isinstance(token, nodes.Node):
token = node2stan(token, docstring_linker)
assert isinstance(token, (str, Tag))
converted_token = converters[type_](token)
converted(converted_token)

return converted
return set_node_attributes(document, children=[
set_node_attributes(nodes.inline('', '', classes=['literal']),
children=elements,
# the +1 here is coping with the fact that
# ParseErrors are 1-based but the doutils
# line we're getting form get_lineno() is zero-based.
lineno=lineno+1)])
2 changes: 1 addition & 1 deletion pydoctor/epydoc/markup/epytext.py
Original file line number Diff line number Diff line change
Expand Up @@ -1379,7 +1379,7 @@ def to_node(self) -> nodes.document:
if self._document is not None:
return self._document

self._document = new_document('epytext')
self._document = new_document('docstring')

if self._tree is not None:
node, = self._to_node(self._tree)
Expand Down
2 changes: 1 addition & 1 deletion pydoctor/epydoc/markup/plaintext.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def to_node(self) -> nodes.document:
return self._document
else:
# create document
_document = new_document('plaintext')
_document = new_document('docstring')

# split text into paragraphs
paragraphs = [set_node_attributes(nodes.paragraph('',''), children=[
Expand Down
2 changes: 1 addition & 1 deletion pydoctor/epydoc/markup/restructuredtext.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def get_transforms(self) -> List[Transform]:
if t != frontmatter.DocInfo]

def new_document(self) -> nodes.document:
document = new_document(self.source.source_path, self.settings)
document = new_document('docstring', self.settings)
# Capture all warning messages.
document.reporter.attach_observer(self.report)
# Return the new document.
Expand Down
2 changes: 1 addition & 1 deletion pydoctor/epydoc2stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -1155,7 +1155,7 @@ def get_constructors_extra(cls:model.Class) -> ParsedDocstring | None:
if not constructors:
return None

document = new_document('constructors')
document = new_document('docstring')

elements: list[nodes.Node] = []
plural = 's' if len(constructors)>1 else ''
Expand Down
Loading
Loading