Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions BetterMD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
import logging
from .elements import A, H1, H2, H3, H4, H5, H6, Head, OL, UL, LI, Text, Div, P, Span, Img, B, I, Br, Blockquote, Hr, Table, Tr, Td, Th, THead, TBody, Input, Code
from .html import CustomHTML
from .markdown import CustomMarkdown
from .rst import CustomRst
from .elements import *
from .parse import Collection, HTMLParser, MDParser, RSTParser

def from_html(html:'str'):
"""
Converts an HTML string into a Symbol.

Given a string with HTML content, returns the corresponding Symbol object.
"""
return Symbol.from_html(html)

def enable_debug_mode():
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("BetterMD")
def from_md(md:'str'):
"""
Convert a Markdown formatted string into a Symbol object.

Parses the provided Markdown text and returns the corresponding Symbol
using the Symbol.from_md conversion method.

Args:
md (str): A string containing Markdown formatted text.

Returns:
Symbol: The Symbol object generated from the Markdown input.
"""
return Symbol.from_md(md)
125 changes: 113 additions & 12 deletions BetterMD/elements/a.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,124 @@
from BetterMD.rst.custom_rst import CustomRst
from .symbol import Symbol
from ..rst import CustomRst
from ..markdown import CustomMarkdown
from ..html import CustomHTML
import re
import typing as t

class MD(CustomMarkdown['A']):
def to_md(self, inner, symbol, parent, **kwargs):
return f"[{" ".join([e.to_md(**kwargs) for e in inner])}]({symbol.get_prop("href")})"
if t.TYPE_CHECKING:
from ..parse import Collection

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Convert a list of inner elements to a Markdown link.

Joins the Markdown representations of the inner elements with spaces and uses the symbol's
href property to format the link.

Args:
inner: A list of elements, each having a to_md() method.
symbol: An object that provides the link destination via the 'href' property.
parent: The parent element context, currently unused.

Returns:
A Markdown formatted link as a string.
"""
return f"[{" ".join([e.to_md() for e in inner])}]({symbol.get_prop("href")})"

def verify(self, text:'str'):
"""
Checks if the text contains any valid Markdown link formats.

This method tests the input string for three Markdown link styles:
inline links (e.g., [label](url)), automatic links (e.g., <url>),
and reference links (e.g., [label][ref] with a corresponding reference definition).
It returns True if any valid link pattern is detected, otherwise False.

Args:
text: The text to search for Markdown link patterns.

Returns:
bool: True if a Markdown link is found; otherwise, False.
"""
if re.findall("\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)", text):
# Case 1: Inline link
return True

elif re.findall("<(https?:\/\/[^\s>]+)>", text):
# Case 2: Automatic Links
return True

elif re.findall("\[([^\]]+)\]\[([^\]]+)\]\s*\n?\[([^\]]+)\]:\s*(https?:\/\/[^\s]+)", text):
# Case 3: Reference Links
return True

return False

class HTML(CustomHTML['A']):
def to_html(self, inner, symbol, parent, **kwargs):
return f"<a href={symbol.get_prop('href')}>{" ".join([e.to_html(**kwargs) for e in inner])}</a>"

class RST(CustomRst['A']):
def to_rst(self, inner, symbol, parent, **kwargs):
return f"`{' '.join([e.to_rst(**kwargs) for e in inner])} <{symbol.get_prop('href')}>`_"
def to_rst(self, inner, symbol, parent):
"""
Converts a list of elements to a reStructuredText hyperlink.

The inner elements are converted into their RST representations, joined with a space,
and combined with the URL obtained from the symbol's 'href' property. The resulting
string follows the standard RST hyperlink syntax.

Args:
inner: A list of elements having a to_rst method.
symbol: An object that provides the hyperlink URL via its get_prop('href') method.
parent: The parent element (currently unused).

Returns:
A string formatted as an RST hyperlink.
"""
return f"`{' '.join([e.to_rst() for e in inner])} <{symbol.get_prop('href')}>`_"

class A(Symbol):
prop_list = ["href"]

refs = {}
md = MD()
html = HTML()
rst = RST()
html = "a"
rst = RST()

@classmethod
def md_refs(cls, references: 'list[str]' = None):
"""
Process Markdown references.

This placeholder class method is intended for handling an optional list of Markdown
reference strings for future processing. Currently, it does not perform any action.

Args:
references: Optional list of Markdown reference strings.
"""
pass

@classmethod
def rst_refs(cls, references: 'list[str]' = None):
"""
Processes reStructuredText (RST) references for the symbol.

This class method serves as a placeholder for handling RST reference links.
If a list of reference identifiers is provided, it may be used in future
enhancements to register or process those references.

Args:
references: Optional list of reference identifiers.
"""
pass

@classmethod
def html_refs(cls, references: 'list[str]' = None):
"""
Processes HTML references.

This is a placeholder method for future processing of HTML reference strings.
If provided, the list of references may be used to update the symbol's HTML links.
Currently, no processing is performed.

Args:
references: Optional list of HTML reference strings. Defaults to None.
"""
pass
107 changes: 94 additions & 13 deletions BetterMD/elements/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,115 @@
from .text import Text
from ..markdown import CustomMarkdown
from ..html import CustomHTML
from ..rst import CustomRst

class MD(CustomMarkdown['Code']):
def to_md(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Convert the given content into Markdown code formatting.

This method transforms the provided content based on the symbol's properties. If the symbol specifies a programming language or the content contains newlines, the content is formatted as a fenced code block using triple backticks. Otherwise, the content is enclosed in single backticks as inline code. If the content is a Text instance, it is first converted to Markdown.

content = " ".join([e.to_md(**kwargs) for e in inner])
Args:
inner: The content to format as Markdown, which may be a string or a Text instance.
symbol: An object containing properties (e.g., language) that influence the formatting.
parent: The parent context element (unused) for interface consistency.

Returns:
A Markdown-formatted string representing the content as a code block or inline code.
"""
language = symbol.get_prop("language", "")
if isinstance(inner, Text):
inner = inner.to_md()

# If it's a code block (has language or multiline)
if language or "\n" in inner:
return f"```{language}\n{content}\n```\n"
return f"```{language}\n{inner}\n```\n"

# Inline code
return f"`{content}`"
return f"`{inner}`"

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
def to_html(self, inner, symbol, parent):
"""
Converts a collection of content elements into an HTML code block.

content = " ".join([e.to_html(**kwargs) for e in inner])
Joins the HTML representation of each item and wraps the result in a <code> element.
If the symbol specifies a programming language, the <code> tag includes a
language-specific CSS class for syntax highlighting.
"""
language = symbol.get_prop("language", "")
inner = "\n".join([i.to_html() for i in inner])

if language:
return f'<pre><code class="language-{language}">{content}</code></pre>'
return f'<code class="language-{language}">{inner}</code>'

return f"<code>{inner}</code>"

def verify(self, text: str) -> bool:
"""
Determine if the provided text equals "code", case-insensitively.

Args:
text: The text to check.

Returns:
bool: True if the text matches "code" irrespective of case, otherwise False.
"""
return text.lower() == "code"

class RST(CustomRst):
def to_rst(self, inner, symbol, parent):
"""
Converts content to reStructuredText formatted code.

This method processes the given content—either a single element or a list—and converts it into a
reStructuredText representation. It extracts a language property from the provided symbol to determine
if the output should be formatted as a code block. When a language is specified or the content spans
multiple lines, the content is indented and returned as a code block (using the ".. code-block::"
directive if a language is provided, or a literal block otherwise). Inline code is wrapped in backticks,
with special handling if backticks already exist in the content.

Args:
inner: The content to convert, which may be a single element or a list of elements.
symbol: An object that supplies properties (such as the programming language) affecting formatting.
parent: Unused parameter reserved for interface compatibility.

Returns:
A string containing the reStructuredText formatted code, either as a code block or inline code.
"""
language = symbol.get_prop("language", "")

return f"<code>{content}</code>"
# Handle inner content
if isinstance(inner, list):
content = "".join([
i.to_rst() if isinstance(i, Symbol) else str(i)
for i in inner
])
else:
content = inner.to_rst() if isinstance(inner, Symbol) else str(inner)

# If it's a code block (has language or multiline)
if language or "\n" in content:
# Use code-block directive for language-specific blocks
if language:
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f".. code-block:: {language}\n\n{indented_content}\n\n"

# Use simple literal block for language-less blocks
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f"::\n\n{indented_content}\n\n"

# Inline code
# Escape backticks if they exist in content
if "`" in content:
return f"``{content}``"
return f"`{content}`"

class Code(Symbol):
prop_list = ["language"]
html = HTML()
md = MD()
rst = "``"
rst = RST()
nl = True
41 changes: 20 additions & 21 deletions BetterMD/elements/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,31 @@
from ..markdown import CustomMarkdown
from ..rst import CustomRst

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
# Collect all input attributes
attrs = []
for prop in Input.props:
value = symbol.get_prop(prop)
if value:
# Handle boolean attributes like 'required', 'disabled', etc.
if isinstance(value, bool) and value:
attrs.append(prop)
else:
attrs.append(f'{prop}="{value}"')

attrs_str = " ".join(attrs)
return f"<input {attrs_str} />"

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent, **kwargs):
def to_md(self, inner, symbol, parent):
"""
Converts an input symbol into its Markdown representation.

If the symbol's "type" property is "checkbox", returns a Markdown formatted checkbox
(with an "x" if checked or a space if unchecked) followed by the inner content's Markdown.
Otherwise, returns the symbol's HTML representation.
"""
if symbol.get_prop("type") == "checkbox":
return f"- [{'x' if symbol.get_prop('checked', '') else ''}] {inner.to_md()}"
return f"- [{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_md()}"
return symbol.to_html()

class RST(CustomRst):
def to_rst(self, inner, symbol, parent, **kwargs):
def to_rst(self, inner, symbol, parent):
"""
Generate an RST formatted string for a checkbox input element.

If the symbol's "type" property is "checkbox", returns a string displaying a checkbox
indicator ("x" if the "checked" property is truthy, otherwise a blank space), optionally
followed by the inner element’s RST representation. For other input types, returns an
empty string.
"""
if symbol.get_prop("type") == "checkbox":
return f"[ ] {inner.to_rst() if inner else ''}"
return f"[{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_rst() if inner else ''}"
return "" # Most input types don't have RST equivalents

class Input(Symbol):
Expand All @@ -50,6 +49,6 @@ class Input(Symbol):
"multiple",
"step"
]
html = HTML()
html = "input"
md = MD()
rst = RST()
Loading