Skip to content

📝 Add docstrings to Parse #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions BetterMD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
import logging
from .elements import A, H1, H2, H3, H4, H5, H6, Head, OL, UL, LI, Text, Div, P, Span, Img, B, I, Br, Blockquote, Hr, Table, Tr, Td, Th, THead, TBody, Input, Code
from .html import CustomHTML
from .markdown import CustomMarkdown
from .rst import CustomRst
from .elements import *
from .parse import Collection, HTMLParser, MDParser, RSTParser

def from_html(html:'str'):
"""
Converts an HTML string to a Symbol object.

This function processes the provided HTML content by calling the Symbol.from_html method and returns the resulting Symbol.

Args:
html: The HTML content to convert.

Returns:
The Symbol object corresponding to the input HTML.
"""
return Symbol.from_html(html)

def enable_debug_mode():
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("BetterMD")
def from_md(md:'str'):
"""
Converts a Markdown string into a Symbol instance.

This function processes a Markdown-formatted string by invoking the
Symbol.from_md method to generate a corresponding Symbol object.

Args:
md (str): The Markdown content to be converted.
"""
return Symbol.from_md(md)
110 changes: 98 additions & 12 deletions BetterMD/elements/a.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,109 @@
from BetterMD.rst.custom_rst import CustomRst
from .symbol import Symbol
from ..rst import CustomRst
from ..markdown import CustomMarkdown
from ..html import CustomHTML
import re
import typing as t

class MD(CustomMarkdown['A']):
def to_md(self, inner, symbol, parent, **kwargs):
return f"[{" ".join([e.to_md(**kwargs) for e in inner])}]({symbol.get_prop("href")})"
if t.TYPE_CHECKING:
from ..parse import Collection

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Converts inner elements and a symbol's href property into a Markdown link.

This method constructs a Markdown link by joining the Markdown representations
of each element in the inner list as the link text and using the 'href' property
(from symbol.get_prop("href")) as the link destination.

Args:
inner: A list of elements that implement a to_md() method.
symbol: An object providing a 'href' value via its get_prop() method.
parent: Unused parameter that exists for interface compatibility.

Returns:
A string representing a Markdown link.
"""
return f"[{" ".join([e.to_md() for e in inner])}]({symbol.get_prop("href")})"

def verify(self, text:'str'):
"""
Check if the given text contains Markdown link patterns.

This method searches for inline links ([text](url)), automatic links (<url>),
and reference links ([text][ref] with corresponding definitions) in the input text.
It returns True if any of these patterns is detected; otherwise, it returns False.

Args:
text: The text to analyze for Markdown link formats.

Returns:
True if a valid link format is found, False otherwise.
"""
if re.findall("\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)", text):
# Case 1: Inline link
return True

elif re.findall("<(https?:\/\/[^\s>]+)>", text):
# Case 2: Automatic Links
return True

elif re.findall("\[([^\]]+)\]\[([^\]]+)\]\s*\n?\[([^\]]+)\]:\s*(https?:\/\/[^\s]+)", text):
# Case 3: Reference Links
return True

return False

class HTML(CustomHTML['A']):
def to_html(self, inner, symbol, parent, **kwargs):
return f"<a href={symbol.get_prop('href')}>{" ".join([e.to_html(**kwargs) for e in inner])}</a>"

class RST(CustomRst['A']):
def to_rst(self, inner, symbol, parent, **kwargs):
return f"`{' '.join([e.to_rst(**kwargs) for e in inner])} <{symbol.get_prop('href')}>`_"
def to_rst(self, inner, symbol, parent):
"""
Converts inner elements to an RST-formatted hyperlink.

This method concatenates the RST representations of the provided inner elements,
retrieves the 'href' property from the symbol, and returns a formatted RST link in the
form: `inner_text <href>`_.

Note: The parent parameter is included for interface compatibility but is not used.
"""
return f"`{' '.join([e.to_rst() for e in inner])} <{symbol.get_prop('href')}>`_"

class A(Symbol):
prop_list = ["href"]

refs = {}
md = MD()
html = HTML()
rst = RST()
html = "a"
rst = RST()

@classmethod
def md_refs(cls, references: 'list[str]' = None):
"""
Registers Markdown references for the symbol.

This class method is a placeholder for future integration of Markdown references.
If provided, the optional 'references' parameter should be a list of strings that
represent reference identifiers. No operation is performed in the current implementation.
"""
pass

@classmethod
def rst_refs(cls, references: 'list[str]' = None):
"""
Processes reStructuredText references for the symbol.

This class method serves as a placeholder for future functionality to handle
reStructuredText-specific link references. An optional list of reference strings
may be provided for processing.
"""
pass

@classmethod
def html_refs(cls, references: 'list[str]' = None):
"""
Handles HTML references for the symbol.

If a list of reference strings is provided, they may be processed or registered.
Currently, this method is a placeholder with no implemented functionality.
"""
pass
107 changes: 94 additions & 13 deletions BetterMD/elements/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,115 @@
from .text import Text
from ..markdown import CustomMarkdown
from ..html import CustomHTML
from ..rst import CustomRst

class MD(CustomMarkdown['Code']):
def to_md(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Convert content to Markdown code.

content = " ".join([e.to_md(**kwargs) for e in inner])
Transforms the provided content into its Markdown representation. If a language is specified
via the symbol or the content contains newline characters, the content is wrapped in a fenced
code block with the optional language identifier; otherwise, it is formatted as inline code.
If the input is a Text instance, it is first converted to Markdown.

Returns:
str: The Markdown-formatted code.
"""
language = symbol.get_prop("language", "")
if isinstance(inner, Text):
inner = inner.to_md()

# If it's a code block (has language or multiline)
if language or "\n" in inner:
return f"```{language}\n{content}\n```\n"
return f"```{language}\n{inner}\n```\n"

# Inline code
return f"`{content}`"
return f"`{inner}`"

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
def to_html(self, inner, symbol, parent):
"""
Converts inner elements to an HTML code element with optional syntax highlighting.

content = " ".join([e.to_html(**kwargs) for e in inner])
Joins the HTML representations of each inner element with newline characters and
wraps the result in a <code> tag. If a programming language is specified in the symbol's
properties, the code element is assigned a language-specific class.
"""
language = symbol.get_prop("language", "")
inner = "\n".join([i.to_html() for i in inner])

if language:
return f'<pre><code class="language-{language}">{content}</code></pre>'
return f'<code class="language-{language}">{inner}</code>'

return f"<code>{inner}</code>"

def verify(self, text: str) -> bool:
"""
Verifies that the input text equals "code" in a case-insensitive manner.

Args:
text: The string to validate against the keyword "code".

Returns:
True if the lowercase version of text is "code", otherwise False.
"""
return text.lower() == "code"

class RST(CustomRst):
def to_rst(self, inner, symbol, parent):
"""
Convert content to a reStructuredText format.

return f"<code>{content}</code>"
This method transforms the provided content into reStructuredText syntax suited for code
representation. It extracts an optional programming language from the symbol and processes the
inner content accordingly. When a language is specified or the content spans multiple lines, it
formats the content as an indented code block—using a language-specific code-block directive if
available or a literal block if not. Otherwise, the content is formatted as inline code with backticks
properly escaped if necessary.

Parameters:
inner: The content to convert, which may be a list of elements or a single element. Each item is
either an object with its own RST conversion method or is convertible to a string.
symbol: An object that provides properties (including a "language" attribute) for determining
code block formatting.
parent: A contextual parameter that is part of the interface but is not used in this conversion.

Returns:
A string containing the content formatted in reStructuredText.
"""
language = symbol.get_prop("language", "")

# Handle inner content
if isinstance(inner, list):
content = "".join([
i.to_rst() if isinstance(i, Symbol) else str(i)
for i in inner
])
else:
content = inner.to_rst() if isinstance(inner, Symbol) else str(inner)

# If it's a code block (has language or multiline)
if language or "\n" in content:
# Use code-block directive for language-specific blocks
if language:
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f".. code-block:: {language}\n\n{indented_content}\n\n"

# Use simple literal block for language-less blocks
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f"::\n\n{indented_content}\n\n"

# Inline code
# Escape backticks if they exist in content
if "`" in content:
return f"``{content}``"
return f"`{content}`"

class Code(Symbol):
prop_list = ["language"]
html = HTML()
md = MD()
rst = "``"
rst = RST()
nl = True
58 changes: 37 additions & 21 deletions BetterMD/elements/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,48 @@
from ..markdown import CustomMarkdown
from ..rst import CustomRst

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
# Collect all input attributes
attrs = []
for prop in Input.props:
value = symbol.get_prop(prop)
if value:
# Handle boolean attributes like 'required', 'disabled', etc.
if isinstance(value, bool) and value:
attrs.append(prop)
else:
attrs.append(f'{prop}="{value}"')

attrs_str = " ".join(attrs)
return f"<input {attrs_str} />"

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent, **kwargs):
def to_md(self, inner, symbol, parent):
"""
Convert an input element to its Markdown representation.

If the symbol represents a checkbox, returns a Markdown-formatted checklist item
with an 'x' when checked (or a space when unchecked) followed by the rendered inner content.
For other input types, the element’s HTML representation is returned.

Parameters:
inner: An object with a to_md() method that renders inner content.
symbol: An element descriptor whose 'type' property determines rendering; if its 'type'
is "checkbox", the 'checked' property is used to indicate its state.
parent: The parent element context (unused in this conversion).

Returns:
A string containing either the Markdown or HTML representation of the input element.
"""
if symbol.get_prop("type") == "checkbox":
return f"- [{'x' if symbol.get_prop('checked', '') else ''}] {inner.to_md()}"
return f"- [{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_md()}"
return symbol.to_html()

class RST(CustomRst):
def to_rst(self, inner, symbol, parent, **kwargs):
def to_rst(self, inner, symbol, parent):
"""
Return a reStructuredText representation of a checkbox input element.

If the symbol's "type" property is "checkbox", formats a checkbox with an "x"
if checked (or a space if not) and appends any nested content rendered via its
to_rst method. For input types other than checkbox, returns an empty string.

Args:
inner: An optional element to be rendered in RST, if provided.
symbol: The input element symbol whose properties determine formatting.
parent: The parent element; not used in this conversion.

Returns:
A string with the RST representation of the checkbox input, or an empty
string.
"""
if symbol.get_prop("type") == "checkbox":
return f"[ ] {inner.to_rst() if inner else ''}"
return f"[{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_rst() if inner else ''}"
return "" # Most input types don't have RST equivalents

class Input(Symbol):
Expand All @@ -50,6 +66,6 @@ class Input(Symbol):
"multiple",
"step"
]
html = HTML()
html = "input"
md = MD()
rst = RST()
Loading