Skip to content

Commit 3e218b9

Browse files
committed
Create docs embeddings
1 parent 5b49e67 commit 3e218b9

File tree

7 files changed

+791
-50
lines changed

7 files changed

+791
-50
lines changed

src/doc_builder/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919

2020
__version__ = "0.6.0.dev0"
2121

22-
from .autodoc import autodoc
22+
from .autodoc import autodoc_svelte
2323
from .build_doc import build_doc
24+
from .build_embeddings import build_embeddings
2425
from .convert_rst_to_mdx import convert_rst_docstring_to_mdx, convert_rst_to_mdx
2526
from .style_doc import style_doc_files
2627
from .utils import update_versions_file

src/doc_builder/autodoc.py

+218-36
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,35 @@ def format_signature(obj):
142142
_re_raisederrors = re.compile(r"<raisederrors>(.*)</raisederrors>", re.DOTALL)
143143

144144

145-
def get_signature_component(name, anchor, signature, object_doc, source_link=None, is_getset_desc=False):
145+
def inside_example_finder_closure(match, tag):
146+
"""
147+
This closure find whether parameters and/or returns sections has example code block inside it
148+
"""
149+
match_str = match.group(1)
150+
examples_inside = _re_example_tags.search(match_str)
151+
if examples_inside:
152+
example_tag = examples_inside.group(1)
153+
match_str = match_str.replace(example_tag, f"</{tag}>{example_tag}", 1)
154+
return f"<{tag}>{match_str}"
155+
return f"<{tag}>{match_str}</{tag}>"
156+
157+
158+
def regex_closure(object_doc, regex):
159+
"""
160+
This closure matches given regex & removes the matched group from object_doc
161+
"""
162+
re_match = regex.search(object_doc)
163+
object_doc = regex.sub("", object_doc)
164+
match = None
165+
if re_match:
166+
_match = re_match.group(1).strip()
167+
if len(_match):
168+
match = _match
169+
return object_doc, match
170+
171+
172+
def get_signature_component_svelte(name, anchor, signature, object_doc, source_link=None, is_getset_desc=False):
173+
print("some things svelte")
146174
"""
147175
Returns the svelte `Docstring` component string.
148176
@@ -154,32 +182,6 @@ def get_signature_component(name, anchor, signature, object_doc, source_link=Non
154182
- **source_link** (Union[`str`, `None`], *optional*, defaults to `None`) -- The github source link of the the object.
155183
- **is_getset_desc** (`bool`, *optional*, defaults to `False`) -- Whether the type of obj is `getset_descriptor`.
156184
"""
157-
158-
def inside_example_finder_closure(match, tag):
159-
"""
160-
This closure find whether parameters and/or returns sections has example code block inside it
161-
"""
162-
match_str = match.group(1)
163-
examples_inside = _re_example_tags.search(match_str)
164-
if examples_inside:
165-
example_tag = examples_inside.group(1)
166-
match_str = match_str.replace(example_tag, f"</{tag}>{example_tag}", 1)
167-
return f"<{tag}>{match_str}"
168-
return f"<{tag}>{match_str}</{tag}>"
169-
170-
def regex_closure(object_doc, regex):
171-
"""
172-
This closure matches given regex & removes the matched group from object_doc
173-
"""
174-
re_match = regex.search(object_doc)
175-
object_doc = regex.sub("", object_doc)
176-
match = None
177-
if re_match:
178-
_match = re_match.group(1).strip()
179-
if len(_match):
180-
match = _match
181-
return object_doc, match
182-
183185
object_doc = _re_returns.sub(lambda m: inside_example_finder_closure(m, "returns"), object_doc)
184186
object_doc = _re_parameters.sub(lambda m: inside_example_finder_closure(m, "parameters"), object_doc)
185187

@@ -237,6 +239,82 @@ def regex_closure(object_doc, regex):
237239
return svelte_str + f"\n{object_doc}\n"
238240

239241

242+
def get_signature_component_markdown(name, anchor, signature, object_doc, source_link=None, is_getset_desc=False):
243+
"""
244+
Returns the svelte `Docstring` component string.
245+
246+
Args:
247+
- **name** (`str`) -- The name of the function or class to document.
248+
- **anchor** (`str`) -- The anchor name of the function or class that will be used for hash links.
249+
- **signature** (`List(Dict(str,str))`) -- The signature of the object.
250+
- **object_doc** (`str`) -- The docstring of the the object.
251+
- **source_link** (Union[`str`, `None`], *optional*, defaults to `None`) -- The github source link of the the object.
252+
- **is_getset_desc** (`bool`, *optional*, defaults to `False`) -- Whether the type of obj is `getset_descriptor`.
253+
"""
254+
object_doc = _re_returns.sub(lambda m: inside_example_finder_closure(m, "returns"), object_doc)
255+
object_doc = _re_parameters.sub(lambda m: inside_example_finder_closure(m, "parameters"), object_doc)
256+
257+
object_doc, parameters = regex_closure(object_doc, _re_parameters)
258+
object_doc, return_description = regex_closure(object_doc, _re_returns)
259+
object_doc, returntype = regex_closure(object_doc, _re_returntype)
260+
object_doc, yield_description = regex_closure(object_doc, _re_yields)
261+
object_doc, yieldtype = regex_closure(object_doc, _re_yieldtype)
262+
object_doc, raise_description = regex_closure(object_doc, _re_raises)
263+
object_doc, raisederrors = regex_closure(object_doc, _re_raisederrors)
264+
object_doc = remove_example_tags(object_doc)
265+
object_doc = hashlink_example_codeblock(object_doc, anchor, False)
266+
267+
# TODO: maybe something like method defintion ?
268+
# markdown_str = "<docstring>"
269+
markdown_str = ""
270+
# markdown_str += f"<name>{name}</name>"
271+
markdown_str += f"Docstring for: {anchor}\n"
272+
markdown_str += f"{object_doc.strip()}\n"
273+
# TODO: useful info to have
274+
# if source_link:
275+
# markdown_str += f"<source>{source_link}</source>"
276+
277+
if len(signature):
278+
signature = json.dumps(signature)
279+
markdown_str += f"Arguments: {signature}\n"
280+
281+
# TODO: write a string that says it is a get method
282+
# if is_getset_desc:
283+
# markdown_str += "<isgetsetdescriptor>"
284+
285+
if parameters is not None:
286+
parameters_str = ""
287+
groups = _re_parameter_group.split(parameters)
288+
group_default = groups.pop(0)
289+
parameters_str += f"Arguments description:\n{group_default}\n"
290+
n_groups = len(groups) // 2
291+
for idx in range(n_groups):
292+
group = groups[2 * idx + 1]
293+
parameters_str += f"\n{group}\n"
294+
295+
markdown_str += parameters_str
296+
297+
if returntype is not None:
298+
markdown_str += (
299+
f"Returns: {returntype}{f' that is {return_description}' if return_description is not None else ''}\n"
300+
)
301+
302+
if yieldtype is not None:
303+
markdown_str += (
304+
f"Yields: {yieldtype}{f' that is {yield_description}' if yield_description is not None else ''}\n"
305+
)
306+
307+
if raisederrors is not None:
308+
markdown_str += (
309+
f"Raises: {raisederrors}{f' that is {raise_description}' if raise_description is not None else ''}\n"
310+
)
311+
312+
markdown_str = re.sub(r"\n+", "\n", markdown_str)
313+
314+
# print(markdown_str)
315+
return markdown_str
316+
317+
240318
# Re pattern to catch :obj:`xx`, :class:`xx`, :func:`xx` or :meth:`xx`.
241319
_re_rst_special_words = re.compile(r":(?:obj|func|class|meth):`([^`]+)`")
242320
# Re pattern to catch things between double backquotes.
@@ -262,7 +340,7 @@ def is_rst_docstring(docstring):
262340
_re_example_codeblock = re.compile(r"((.*:\s+)?^```((?!```)(.|\n))*```)", re.MULTILINE)
263341

264342

265-
def hashlink_example_codeblock(object_doc, object_anchor):
343+
def hashlink_example_codeblock(object_doc, object_anchor, is_svelte=True):
266344
"""
267345
Returns the svelte `ExampleCodeBlock` component string.
268346
@@ -282,7 +360,11 @@ def add_example_svelte_blocks(match):
282360
example_id += 1
283361
id_str = "" if example_id == 1 else f"-{example_id}"
284362
example_anchor = f"{object_anchor}.example{id_str}"
285-
return f'<ExampleCodeBlock anchor="{example_anchor}">\n\n{match.group(1)}\n\n</ExampleCodeBlock>'
363+
return (
364+
f'<ExampleCodeBlock anchor="{example_anchor}">\n\n{match.group(1)}\n\n</ExampleCodeBlock>'
365+
if is_svelte
366+
else match.group(1)
367+
)
286368

287369
object_doc = _re_example_codeblock.sub(add_example_svelte_blocks, object_doc)
288370
return object_doc
@@ -355,7 +437,9 @@ def get_source_path(object_name, package):
355437
return obj_path
356438

357439

358-
def document_object(object_name, package, page_info, full_name=True, anchor_name=None, version_tag_suffix="src/"):
440+
def document_object(
441+
object_name, package, page_info, full_name=True, anchor_name=None, version_tag_suffix="src/", is_svelte=True
442+
):
359443
"""
360444
Writes the document of a function, class or method.
361445
@@ -410,11 +494,18 @@ def document_object(object_name, package, page_info, full_name=True, anchor_name
410494
# tokenizers obj do NOT have `__module__` attribute & can NOT be used with inspect.getsourcelines
411495
source_link = None
412496
is_getset_desc = is_getset_descriptor(obj)
413-
component = get_signature_component(
414-
signature_name, anchor_name, signature, object_doc, source_link, is_getset_desc
415-
)
416-
documentation = "\n" + component + "\n"
417-
return documentation, check
497+
if is_svelte:
498+
documentation = get_signature_component_svelte(
499+
signature_name, anchor_name, signature, object_doc, source_link, is_getset_desc
500+
)
501+
documentation = "\n" + documentation + "\n"
502+
return documentation, check
503+
else:
504+
# markdown
505+
documentation = get_signature_component_markdown(
506+
signature_name, anchor_name, signature, object_doc, source_link, is_getset_desc
507+
)
508+
return documentation, anchor_name, check
418509

419510

420511
def find_documented_methods(clas):
@@ -446,7 +537,9 @@ def find_documented_methods(clas):
446537
docstring_css_classes = "docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"
447538

448539

449-
def autodoc(object_name, package, methods=None, return_anchors=False, page_info=None, version_tag_suffix="src/"):
540+
def autodoc_svelte(
541+
object_name, package, methods=None, return_anchors=False, page_info=None, version_tag_suffix="src/"
542+
):
450543
"""
451544
Generates the documentation of an object, with a potential filtering on the methods for a class.
452545
@@ -520,6 +613,95 @@ def autodoc(object_name, package, methods=None, return_anchors=False, page_info=
520613
return (documentation, anchors, errors) if return_anchors else documentation
521614

522615

616+
def autodoc_markdown(
617+
object_name, package, methods=None, return_anchors=False, page_info=None, version_tag_suffix="src/"
618+
):
619+
"""
620+
Generates the documentation of an object, with a potential filtering on the methods for a class.
621+
622+
Args:
623+
object_name (`str`): The name of the function or class to document.
624+
package (`types.ModuleType`): The package of the object.
625+
methods (`List[str]`, *optional*):
626+
A list of methods to document if `obj` is a class. If nothing is passed, all public methods with a new
627+
docstring compared to the superclasses are documented. If a list of methods is passed and ou want to add
628+
all those methods, the key "all" will add them.
629+
return_anchors (`bool`, *optional*, defaults to `False`):
630+
Whether or not to return the list of anchors generated.
631+
page_info (`Dict[str, str]`, *optional*): Some information about the page.
632+
version_tag_suffix (`str`, *optional*, defaults to `"src/"`):
633+
Suffix to add after the version tag (e.g. 1.3.0 or main) in the documentation links.
634+
For example, the default `"src/"` suffix will result in a base link as `https://github.com/{repo_owner}/{package_name}/blob/{version_tag}/src/`.
635+
For example, `version_tag_suffix=""` will result in a base link as `https://github.com/{repo_owner}/{package_name}/blob/{version_tag}/`.
636+
"""
637+
if page_info is None:
638+
page_info = {}
639+
if "package_name" not in page_info:
640+
page_info["package_name"] = package.__name__
641+
642+
object_docs = []
643+
errors = []
644+
obj = find_object_in_package(object_name=object_name, package=package)
645+
documentation, anchor_name, check = document_object(
646+
object_name=object_name,
647+
package=package,
648+
page_info=page_info,
649+
version_tag_suffix=version_tag_suffix,
650+
is_svelte=False,
651+
)
652+
object_docs.append({"doc": documentation, "anchor_name": anchor_name})
653+
654+
if check is not None:
655+
errors.append(check)
656+
657+
if return_anchors:
658+
anchors = [get_shortest_path(obj, package)]
659+
if isinstance(obj, type):
660+
documentation, anchor_name, check = document_object(
661+
object_name=object_name,
662+
package=package,
663+
page_info=page_info,
664+
version_tag_suffix=version_tag_suffix,
665+
is_svelte=False,
666+
)
667+
object_docs.pop()
668+
object_docs.append({"doc": documentation, "anchor_name": anchor_name})
669+
if check is not None:
670+
errors.append(check)
671+
if methods is None:
672+
methods = find_documented_methods(obj)
673+
elif "all" in methods:
674+
methods.remove("all")
675+
methods_to_add = find_documented_methods(obj)
676+
methods.extend([m for m in methods_to_add if m not in methods])
677+
elif "none" in methods:
678+
methods = []
679+
for method in methods:
680+
anchor_name = f"{anchors[0]}.{method}"
681+
method_doc, anchor_name, check = document_object(
682+
object_name=f"{object_name}.{method}",
683+
package=package,
684+
page_info=page_info,
685+
full_name=False,
686+
anchor_name=anchor_name,
687+
version_tag_suffix=version_tag_suffix,
688+
is_svelte=False,
689+
)
690+
if check is not None:
691+
errors.append(check)
692+
object_docs.append({"doc": method_doc, "anchor_name": anchor_name})
693+
if return_anchors:
694+
# The anchor name of the method might be different from its
695+
method = find_object_in_package(f"{anchors[0]}.{method}", package=package)
696+
method_name = get_shortest_path(method, package=package)
697+
if anchor_name == method_name or method_name is None:
698+
anchors.append(anchor_name)
699+
else:
700+
anchors.append((anchor_name, method_name))
701+
702+
return (object_docs, anchors, errors) if return_anchors else object_docs
703+
704+
523705
def resolve_links_in_text(text, package, mapping, page_info):
524706
"""
525707
Resolve links of the form [`SomeClass`] to the link in the documentation to `SomeClass`.

src/doc_builder/build_doc.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import yaml
2525
from tqdm import tqdm
2626

27-
from .autodoc import autodoc, find_object_in_package, get_source_path, resolve_links_in_text
27+
from .autodoc import autodoc_svelte, find_object_in_package, get_source_path, resolve_links_in_text
2828
from .convert_md_to_mdx import convert_md_to_mdx
2929
from .convert_rst_to_mdx import convert_rst_to_mdx, find_indent, is_empty_line
3030
from .convert_to_notebook import generate_notebooks_from_file
@@ -120,7 +120,7 @@ def resolve_autodoc(content, package, return_anchors=False, page_info=None, vers
120120
break
121121
else:
122122
methods = None
123-
doc = autodoc(
123+
doc = autodoc_svelte(
124124
object_name,
125125
package,
126126
methods=methods,

0 commit comments

Comments
 (0)