Skip to content

Push v0.3.3 to main #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,3 @@ jobs:

- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1

- name: Publish package distributions to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: https://test.pypi.org/legacy/
16 changes: 8 additions & 8 deletions BetterMD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .elements import *
from . import elements
from .html import CustomHTML
from .markdown import CustomMarkdown
from .rst import CustomRst
Expand All @@ -13,7 +13,7 @@ def read(self) -> str: ...
class HTML:
@staticmethod
def from_string(html:'str'):
return Symbol.from_html(html)
return elements.Symbol.from_html(html)

@staticmethod
def from_file(file: 'Readable'):
Expand All @@ -22,7 +22,7 @@ def from_file(file: 'Readable'):
except Exception as e:
raise IOError(f"Error reading HTML file: {e}")

return Symbol.from_html(text)
return elements.Symbol.from_html(text)

@staticmethod
def from_url(url:'str'):
Expand All @@ -35,7 +35,7 @@ def from_url(url:'str'):
except Exception as e:
raise IOError(f"Error reading HTML from URL: {e}")

ret = Symbol.from_html(text)
ret = elements.Symbol.from_html(text)

if len(ret) == 1:
return ret[0]
Expand All @@ -45,7 +45,7 @@ def from_url(url:'str'):
class MD:
@staticmethod
def from_string(md:'str'):
return Symbol.from_md(md)
return elements.Symbol.from_md(md)

@staticmethod
def from_file(file: 'Readable'):
Expand All @@ -54,7 +54,7 @@ def from_file(file: 'Readable'):
except Exception as e:
raise IOError(f"Error reading Markdown file: {e}")

return Symbol.from_md(text)
return elements.Symbol.from_md(text)

@staticmethod
def from_url(url):
Expand All @@ -64,7 +64,7 @@ def from_url(url):
except Exception as e:
raise IOError(f"Error reading Markdown from URL: {e}")

return Symbol.from_md(text)
return elements.Symbol.from_md(text)


__all__ = ["HTML", "MD", "Symbol", "Collection", "HTMLParser", "MDParser", "CustomHTML", "CustomMarkdown", "CustomRst", "enable_debug_mode"]
__all__ = ["HTML", "MD", "elements", "Collection", "HTMLParser", "MDParser", "CustomHTML", "CustomMarkdown", "CustomRst", "enable_debug_mode"]
2 changes: 1 addition & 1 deletion BetterMD/elements/col.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ class Col(Symbol):
md = ""
html = "col"
rst = ""
self_closing = True
type = "void"
123 changes: 109 additions & 14 deletions BetterMD/elements/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

if t.TYPE_CHECKING:
from .symbol import Symbol
from ..typing import ATTR_TYPES

T1 = t.TypeVar("T1")
T2 = t.TypeVar("T2")
Expand All @@ -10,6 +11,40 @@

ARGS = t.ParamSpec("ARGS")

class HashableList(t.Generic[T1]):
def __init__(self, lst:'list[T1]'):
self.lst = lst

def __hash__(self):
# Convert list to tuple for hashing
return hash(tuple(self.lst))

def __eq__(self, other):
if not isinstance(other, HashableList):
return False
return self.lst == other.lst

def __repr__(self):
return f"HashableList({self.lst})"


class HashableDict(t.Generic[T1, T2]):
def __init__(self, dct:'dict[T1, T2]'):
self.dct = dct

def __hash__(self):
# Convert list to tuple for hashing
return hash(tuple(self.dct.items()))

def __eq__(self, other):
if not isinstance(other, HashableDict):
return False
return self.dct == other.dct

def __repr__(self):
return f"HashableList({self.dct})"


class GetProtocol(t.Protocol, t.Generic[T1, T2]):
def get(self, key: 'T1', ) -> 'T2': ...

Expand All @@ -20,18 +55,21 @@ def copy(self) -> 'T1': ...
class Copy:
def __init__(self, data):
self.data = data

def copy(self):
return self.data

T5 = t.TypeVar("T5", bound=CopyProtocol)
HASHABLE_ATTRS = str | bool | int | float | HashableList['HASHABLE_ATTRS'] | HashableDict[str, 'HASHABLE_ATTRS']

class Fetcher(t.Generic[T1, T2, T5]):
def __init__(self, data: 'GetProtocol[T1, T2]', default:'T5'=Copy(None)):
def __init__(self, data: 't.Union[GetProtocol[T1, T2], dict[T1, T2]]', default:'T5'=Copy(None)):
self.data = data
self.default = default.copy() if isinstance(default, CopyProtocol) else default

Comment on lines +66 to 69
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Avoid calling Copy() in default argument (B008)

Default arguments are evaluated at import time. Replace with None and
initialise inside the function to prevent accidental state sharing.

-def __init__(self, data: ..., default: T5 = Copy(None)):
+def __init__(self, data: ..., default: T5 | None = None):
     self.data = data
-    self.default = default.copy() if isinstance(default, CopyProtocol) else default
+    default = default or Copy(None)
+    self.default = default.copy() if isinstance(default, CopyProtocol) else default

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 Ruff (0.8.2)

66-66: Do not perform function call Copy in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable

(B008)

def __getitem__(self, name:'T1') -> 'T2|T5':
if isinstance(self.data, dict):
return self.data.get(name, self.default)
return self.data.get(name, self.default)
class InnerHTML:
def __init__(self, inner):
Expand All @@ -40,35 +78,54 @@ def __init__(self, inner):
self.ids: 'dict[str|None, list[Symbol]]' = {}
self.classes: 'dict[str, list[Symbol]]' = {}
self.tags: 'dict[type[Symbol], list[Symbol]]' = {}
self.attrs: 'dict[str, dict[HASHABLE_ATTRS, list[Symbol]]]' = {}
self.text: 'dict[str, list[Symbol]]' = {}

self.children_ids: 'dict[str|None, list[Symbol]]' = {}
self.children_classes: 'dict[str, list[Symbol]]' = {}
self.children_tags: 'dict[type[Symbol], list[Symbol]]' = {}
self.children_attrs: 'dict[str, dict[str, list[Symbol]]]' = {}
self.children_text: 'dict[str, list[Symbol]]' = {}

def add_elm(self, elm:'Symbol'):
"""
Add an element to the children indexes and merge the element's own indexes
recursively into aggregate indexes.
def add_elm(self, elm: 'Symbol'):
def make_hashable(v):
if isinstance(v, list):
return HashableList(v)
elif isinstance(v, dict):
return HashableDict(v)
return v

Args:
elm: Symbol element to add to the indexes.
"""
self.children_ids.setdefault(elm.get_prop("id", None), []).append(elm)
[self.children_classes.setdefault(c, []).append(elm) for c in elm.classes]
self.children_tags.setdefault(type(elm), []).append(elm)

def concat(d1: 'dict[T1|T3, list[T2|T4]]', *d2: 'dict[T3, list[T4]]', **kwargs):
ret = {**kwargs}
# Normalize keys when adding to children_attrs
for prop, value in elm.props.items():
key = make_hashable(value)
self.children_attrs.setdefault(prop, {}).setdefault(key, []).append(elm)

for dict in list(d2) + [d1]:
for k, v in dict.items():
self.children_text.setdefault(elm.text, []).append(elm)

def concat(d1: 'dict', *d2: 'dict'):
ret = {}

for dict_ in list(d2) + [d1]:
for k, v in dict_.items():
ret.setdefault(k, []).extend(v)

return ret

# Normalize keys in elm.props for attrs merging
normalized_props = {
prop: {make_hashable(value): [elm] for value in values}
for prop, values in elm.props.items()
}

Comment on lines +118 to +123
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

normalized_props comprehension breaks for non-iterable attribute values

When a prop value is a scalar (str, int, bool), iterating over it yields
single characters/digits, producing nonsense keys.

Consider:

-normalized_props = {
-    prop: {make_hashable(value): [elm] for value in values}
-    for prop, values in elm.props.items()
-}
+normalized_props: dict[str, dict[HASHABLE_ATTRS, list[Symbol]]] = {}
+for prop, val in elm.props.items():
+    key = make_hashable(val)
+    normalized_props.setdefault(prop, {})[key] = [elm]
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Normalize keys in elm.props for attrs merging
normalized_props = {
prop: {make_hashable(value): [elm] for value in values}
for prop, values in elm.props.items()
}
# Normalize keys in elm.props for attrs merging
normalized_props: dict[str, dict[HASHABLE_ATTRS, list[Symbol]]] = {}
for prop, val in elm.props.items():
key = make_hashable(val)
normalized_props.setdefault(prop, {})[key] = [elm]

self.ids = concat(self.ids, elm.inner_html.ids, {elm.get_prop("id", None): [elm]})
self.classes = concat(self.classes, elm.inner_html.classes, {c: [elm] for c in elm.classes})
self.tags = concat(self.tags, elm.inner_html.tags, {type(elm): [elm]})
self.attrs = concat(self.attrs, elm.inner_html.attrs, normalized_props)
self.text = concat(self.text, elm.inner_html.text, {elm.text: [elm]})

def get_elements_by_id(self, id: 'str'):
return self.ids.get(id, [])
Expand All @@ -77,7 +134,45 @@ def get_elements_by_class_name(self, class_name: 'str'):
return self.classes.get(class_name, [])

def get_elements_by_tag_name(self, tag: 'str'):
return self.tags.get(tag, [])
# Find the tag class by name
for tag_class, elements in self.tags.items():
if tag_class.__name__.lower() == tag.lower():
return elements
return []

def find(self, key:'str'):
if key.startswith("#"):
return self.get_elements_by_id(key[1:])
elif key.startswith("."):
return self.get_elements_by_class_name(key[1:])
else:
return self.get_elements_by_tag_name(key)

def get_by_text(self, text:'str'):
return self.text.get(text, [])

def get_by_attr(self, attr:'str', value:'str'):
return self.attrs.get(attr, {}).get(value, [])

Comment on lines +155 to +156
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

get_by_attr misses hash-normalisation

Look-ups with list/dict values will fail because keys are stored in their
hashable wrappers. Apply make_hashable before querying:

-def get_by_attr(self, attr: str, value: str):
-    return self.attrs.get(attr, {}).get(value, [])
+def get_by_attr(self, attr: str, value):
+    key = value if isinstance(value, (str, int, float, bool)) else make_hashable(value)
+    return self.attrs.get(attr, {}).get(key, [])
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
return self.attrs.get(attr, {}).get(value, [])
def get_by_attr(self, attr: str, value):
key = value if isinstance(value, (str, int, float, bool)) else make_hashable(value)
return self.attrs.get(attr, {}).get(key, [])

def advanced_find(self, tag:'str', attrs:'dict[t.Literal["text"] | str, str | bool | int | float | tuple[str, str | bool | int | float] | list[str | bool | int | float | tuple[str, str | bool | int | float]]]' = {}):
def check_attr(e:'Symbol', k:'str', v:'str | bool | int | float | tuple[str, str | bool | int | float]'):
Comment on lines +157 to +158
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Mutable default for attrs parameter (B006)

Using {} as a default value causes state to leak between calls.

-def advanced_find(self, tag: str,
-                  attrs: dict[...]= {}):
+def advanced_find(self, tag: str,
+                  attrs: dict[...]|None = None):
     ...
-    if "text" in attrs:
+    attrs = attrs or {}
+    if "text" in attrs:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def advanced_find(self, tag:'str', attrs:'dict[t.Literal["text"] | str, str | bool | int | float | tuple[str, str | bool | int | float] | list[str | bool | int | float | tuple[str, str | bool | int | float]]]' = {}):
def check_attr(e:'Symbol', k:'str', v:'str | bool | int | float | tuple[str, str | bool | int | float]'):
def advanced_find(self, tag: str,
attrs: dict[t.Literal["text"] | str,
str | bool | int | float |
tuple[str, str | bool | int | float] |
list[str | bool | int | float |
tuple[str, str | bool | int | float]]]
| None = None):
attrs = attrs or {}
def check_attr(e: 'Symbol',
k: 'str',
v: 'str | bool | int | float |
tuple[str, str | bool | int | float]'):
...
🧰 Tools
🪛 Ruff (0.8.2)

157-157: Do not use mutable data structures for argument defaults

Replace with None; initialize within function

(B006)

prop = e.get_prop(k)
if isinstance(prop, list):
return v in prop

if isinstance(prop, dict):
return v in list(prop.items())

return prop == v

tags = self.find(tag)
if "text" in attrs:
text = attrs.pop("text")
tags = filter(lambda e: e.text == text, tags)

for k, v in attrs.items():
tags = filter(lambda e: check_attr(e, k, v) if not isinstance(v, list) else all([check_attr(e, k, i) for i in v]), tags)
return list(tags)

@property
def id(self):
Expand Down
4 changes: 2 additions & 2 deletions BetterMD/elements/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def to_html(self, inner, symbol, parent):


class Style(Symbol):
def __init__(self, styles:'dict[str, ATTR_TYPES]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None, *, style: t.Optional[StyleDict] = None, raw: str = "",**props):
def __init__(self, *, style: t.Optional[StyleDict] = None, raw: str = "",**props):
"""
Styles with intuitive nested structure

Expand All @@ -73,7 +73,7 @@ def __init__(self, styles:'dict[str, ATTR_TYPES]'=None, classes:'list[str]'=None
inner: Child symbols
**props: Additional properties
"""
super().__init__(styles, classes, inner, **props)
super().__init__(**props)
self.style: 'StyleDict' = style or {}
self.raw: 'str' = raw

Expand Down
38 changes: 22 additions & 16 deletions BetterMD/elements/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

import itertools as it

T = t.TypeVar("T", bound=ATTR_TYPES)
T1 = t.TypeVar("T1", bound=t.Union[ATTR_TYPES, t.Any])


set_recursion_limit(10000)

Expand All @@ -18,7 +21,6 @@ class Symbol:
prop_list: 'list[str]' = []
md: 't.Union[str, CustomMarkdown]' = ""
rst: 't.Union[str, CustomRst]' = ""
nl:'bool' = False
type: 't.Literal["block", "void", "inline"]' = "inline"

collection = Collection()
Expand All @@ -32,27 +34,30 @@ def __init_subclass__(cls, **kwargs) -> None:
cls._cuuid = it.count()
super().__init_subclass__(**kwargs)

def __init__(self, styles:'dict[str,str]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None, **props:'ATTR_TYPES'):
def __init__(self, inner:'list[Symbol]'=None, **props:'ATTR_TYPES'):
cls = type(self)

self.parent:'Symbol' = None
self.prepared:'bool' = False
self.html_written_props = ""
self.document = InnerHTML(self)

if styles is None:
styles = {}
if classes is None:
classes = []
if inner is None:
inner = []

self.styles: 'dict[str, str]' = styles
self.classes: 'list[str]' = classes
self.children:'List[Symbol]' = List(inner) or List()
self.props: 'dict[str, ATTR_TYPES]' = props
self.nuuid = next(cls._cuuid)

@property
def styles(self):
return self.props.get("style", {})

@property
def classes(self):
return self.props.get("class", [])


@property
def uuid(self):
return f"{type(self).__name__}-{self.nuuid}"
Expand All @@ -64,7 +69,7 @@ def text(self) -> 'str':

return "".join([e.text for e in self.children])

def copy(self, styles:'dict[str,str]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None):
def copy(self, styles:'dict[str,str]'=None):
if inner is None:
inner = []
if styles is None:
Expand Down Expand Up @@ -212,10 +217,10 @@ def handle_element(element:'ELEMENT|TEXT'):
attributes = text["attributes"]

# Handle class attribute separately if it exists
classes = []
classes:'list[str]' = []
if "class" in attributes:
classes = attributes["class"].split() if isinstance(attributes["class"], str) else attributes["class"]
del attributes["class"]
attributes["class"] = classes

# Handle style attribute separately if it exists
styles = {}
Expand All @@ -225,19 +230,20 @@ def handle_element(element:'ELEMENT|TEXT'):
styles = dict(item.split(":") for item in style_str.split(";") if ":" in item)
elif isinstance(style_str, dict):
styles = style_str
del attributes["style"]
attributes["style"] = styles

inner=[handle_element(elm) for elm in text["children"]]

return cls(
styles=styles,
classes=classes,
inner=inner,
**attributes
)

def get_prop(self, prop, default=""):
return self.props.get(prop, default)
def get_prop(self, prop:'str', default: 'T1'=None) -> 'ATTR_TYPES| T1':
try:
return self.props.get(prop, default) if default is not None else self.props.get(prop)
except Exception as e:
raise e

def set_prop(self, prop, value):
self.props[prop] = value
Expand Down
Loading