diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8170f77..e527153 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -29,8 +29,3 @@ jobs: - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 - - - name: Publish package distributions to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ \ No newline at end of file diff --git a/BetterMD/__init__.py b/BetterMD/__init__.py index a2009e9..28aefaa 100644 --- a/BetterMD/__init__.py +++ b/BetterMD/__init__.py @@ -1,4 +1,4 @@ -from .elements import * +from . import elements from .html import CustomHTML from .markdown import CustomMarkdown from .rst import CustomRst @@ -13,7 +13,7 @@ def read(self) -> str: ... class HTML: @staticmethod def from_string(html:'str'): - return Symbol.from_html(html) + return elements.Symbol.from_html(html) @staticmethod def from_file(file: 'Readable'): @@ -22,7 +22,7 @@ def from_file(file: 'Readable'): except Exception as e: raise IOError(f"Error reading HTML file: {e}") - return Symbol.from_html(text) + return elements.Symbol.from_html(text) @staticmethod def from_url(url:'str'): @@ -35,7 +35,7 @@ def from_url(url:'str'): except Exception as e: raise IOError(f"Error reading HTML from URL: {e}") - ret = Symbol.from_html(text) + ret = elements.Symbol.from_html(text) if len(ret) == 1: return ret[0] @@ -45,7 +45,7 @@ def from_url(url:'str'): class MD: @staticmethod def from_string(md:'str'): - return Symbol.from_md(md) + return elements.Symbol.from_md(md) @staticmethod def from_file(file: 'Readable'): @@ -54,7 +54,7 @@ def from_file(file: 'Readable'): except Exception as e: raise IOError(f"Error reading Markdown file: {e}") - return Symbol.from_md(text) + return elements.Symbol.from_md(text) @staticmethod def from_url(url): @@ -64,7 +64,7 @@ def from_url(url): except Exception as e: raise IOError(f"Error reading Markdown from URL: {e}") - return Symbol.from_md(text) + return elements.Symbol.from_md(text) -__all__ = ["HTML", "MD", "Symbol", "Collection", "HTMLParser", "MDParser", "CustomHTML", "CustomMarkdown", "CustomRst", "enable_debug_mode"] +__all__ = ["HTML", "MD", "elements", "Collection", "HTMLParser", "MDParser", "CustomHTML", "CustomMarkdown", "CustomRst", "enable_debug_mode"] diff --git a/BetterMD/elements/col.py b/BetterMD/elements/col.py index cc84f42..564f2c7 100644 --- a/BetterMD/elements/col.py +++ b/BetterMD/elements/col.py @@ -13,4 +13,4 @@ class Col(Symbol): md = "" html = "col" rst = "" - self_closing = True \ No newline at end of file + type = "void" \ No newline at end of file diff --git a/BetterMD/elements/document.py b/BetterMD/elements/document.py index 5813adc..4f59589 100644 --- a/BetterMD/elements/document.py +++ b/BetterMD/elements/document.py @@ -2,6 +2,7 @@ if t.TYPE_CHECKING: from .symbol import Symbol + from ..typing import ATTR_TYPES T1 = t.TypeVar("T1") T2 = t.TypeVar("T2") @@ -10,6 +11,40 @@ ARGS = t.ParamSpec("ARGS") +class HashableList(t.Generic[T1]): + def __init__(self, lst:'list[T1]'): + self.lst = lst + + def __hash__(self): + # Convert list to tuple for hashing + return hash(tuple(self.lst)) + + def __eq__(self, other): + if not isinstance(other, HashableList): + return False + return self.lst == other.lst + + def __repr__(self): + return f"HashableList({self.lst})" + + +class HashableDict(t.Generic[T1, T2]): + def __init__(self, dct:'dict[T1, T2]'): + self.dct = dct + + def __hash__(self): + # Convert list to tuple for hashing + return hash(tuple(self.dct.items())) + + def __eq__(self, other): + if not isinstance(other, HashableDict): + return False + return self.dct == other.dct + + def __repr__(self): + return f"HashableList({self.dct})" + + class GetProtocol(t.Protocol, t.Generic[T1, T2]): def get(self, key: 'T1', ) -> 'T2': ... @@ -20,18 +55,21 @@ def copy(self) -> 'T1': ... class Copy: def __init__(self, data): self.data = data - + def copy(self): return self.data T5 = t.TypeVar("T5", bound=CopyProtocol) +HASHABLE_ATTRS = str | bool | int | float | HashableList['HASHABLE_ATTRS'] | HashableDict[str, 'HASHABLE_ATTRS'] class Fetcher(t.Generic[T1, T2, T5]): - def __init__(self, data: 'GetProtocol[T1, T2]', default:'T5'=Copy(None)): + def __init__(self, data: 't.Union[GetProtocol[T1, T2], dict[T1, T2]]', default:'T5'=Copy(None)): self.data = data self.default = default.copy() if isinstance(default, CopyProtocol) else default def __getitem__(self, name:'T1') -> 'T2|T5': + if isinstance(self.data, dict): + return self.data.get(name, self.default) return self.data.get(name, self.default) class InnerHTML: def __init__(self, inner): @@ -40,35 +78,54 @@ def __init__(self, inner): self.ids: 'dict[str|None, list[Symbol]]' = {} self.classes: 'dict[str, list[Symbol]]' = {} self.tags: 'dict[type[Symbol], list[Symbol]]' = {} + self.attrs: 'dict[str, dict[HASHABLE_ATTRS, list[Symbol]]]' = {} + self.text: 'dict[str, list[Symbol]]' = {} self.children_ids: 'dict[str|None, list[Symbol]]' = {} self.children_classes: 'dict[str, list[Symbol]]' = {} self.children_tags: 'dict[type[Symbol], list[Symbol]]' = {} + self.children_attrs: 'dict[str, dict[str, list[Symbol]]]' = {} + self.children_text: 'dict[str, list[Symbol]]' = {} - def add_elm(self, elm:'Symbol'): - """ - Add an element to the children indexes and merge the element's own indexes - recursively into aggregate indexes. + def add_elm(self, elm: 'Symbol'): + def make_hashable(v): + if isinstance(v, list): + return HashableList(v) + elif isinstance(v, dict): + return HashableDict(v) + return v - Args: - elm: Symbol element to add to the indexes. - """ self.children_ids.setdefault(elm.get_prop("id", None), []).append(elm) [self.children_classes.setdefault(c, []).append(elm) for c in elm.classes] self.children_tags.setdefault(type(elm), []).append(elm) - def concat(d1: 'dict[T1|T3, list[T2|T4]]', *d2: 'dict[T3, list[T4]]', **kwargs): - ret = {**kwargs} + # Normalize keys when adding to children_attrs + for prop, value in elm.props.items(): + key = make_hashable(value) + self.children_attrs.setdefault(prop, {}).setdefault(key, []).append(elm) - for dict in list(d2) + [d1]: - for k, v in dict.items(): + self.children_text.setdefault(elm.text, []).append(elm) + + def concat(d1: 'dict', *d2: 'dict'): + ret = {} + + for dict_ in list(d2) + [d1]: + for k, v in dict_.items(): ret.setdefault(k, []).extend(v) return ret + # Normalize keys in elm.props for attrs merging + normalized_props = { + prop: {make_hashable(value): [elm] for value in values} + for prop, values in elm.props.items() + } + self.ids = concat(self.ids, elm.inner_html.ids, {elm.get_prop("id", None): [elm]}) self.classes = concat(self.classes, elm.inner_html.classes, {c: [elm] for c in elm.classes}) self.tags = concat(self.tags, elm.inner_html.tags, {type(elm): [elm]}) + self.attrs = concat(self.attrs, elm.inner_html.attrs, normalized_props) + self.text = concat(self.text, elm.inner_html.text, {elm.text: [elm]}) def get_elements_by_id(self, id: 'str'): return self.ids.get(id, []) @@ -77,7 +134,45 @@ def get_elements_by_class_name(self, class_name: 'str'): return self.classes.get(class_name, []) def get_elements_by_tag_name(self, tag: 'str'): - return self.tags.get(tag, []) + # Find the tag class by name + for tag_class, elements in self.tags.items(): + if tag_class.__name__.lower() == tag.lower(): + return elements + return [] + + def find(self, key:'str'): + if key.startswith("#"): + return self.get_elements_by_id(key[1:]) + elif key.startswith("."): + return self.get_elements_by_class_name(key[1:]) + else: + return self.get_elements_by_tag_name(key) + + def get_by_text(self, text:'str'): + return self.text.get(text, []) + + def get_by_attr(self, attr:'str', value:'str'): + return self.attrs.get(attr, {}).get(value, []) + + def advanced_find(self, tag:'str', attrs:'dict[t.Literal["text"] | str, str | bool | int | float | tuple[str, str | bool | int | float] | list[str | bool | int | float | tuple[str, str | bool | int | float]]]' = {}): + def check_attr(e:'Symbol', k:'str', v:'str | bool | int | float | tuple[str, str | bool | int | float]'): + prop = e.get_prop(k) + if isinstance(prop, list): + return v in prop + + if isinstance(prop, dict): + return v in list(prop.items()) + + return prop == v + + tags = self.find(tag) + if "text" in attrs: + text = attrs.pop("text") + tags = filter(lambda e: e.text == text, tags) + + for k, v in attrs.items(): + tags = filter(lambda e: check_attr(e, k, v) if not isinstance(v, list) else all([check_attr(e, k, i) for i in v]), tags) + return list(tags) @property def id(self): diff --git a/BetterMD/elements/style.py b/BetterMD/elements/style.py index 3423155..0bf5e2b 100644 --- a/BetterMD/elements/style.py +++ b/BetterMD/elements/style.py @@ -63,7 +63,7 @@ def to_html(self, inner, symbol, parent): class Style(Symbol): - def __init__(self, styles:'dict[str, ATTR_TYPES]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None, *, style: t.Optional[StyleDict] = None, raw: str = "",**props): + def __init__(self, *, style: t.Optional[StyleDict] = None, raw: str = "",**props): """ Styles with intuitive nested structure @@ -73,7 +73,7 @@ def __init__(self, styles:'dict[str, ATTR_TYPES]'=None, classes:'list[str]'=None inner: Child symbols **props: Additional properties """ - super().__init__(styles, classes, inner, **props) + super().__init__(**props) self.style: 'StyleDict' = style or {} self.raw: 'str' = raw diff --git a/BetterMD/elements/symbol.py b/BetterMD/elements/symbol.py index b666bae..2f53240 100644 --- a/BetterMD/elements/symbol.py +++ b/BetterMD/elements/symbol.py @@ -10,6 +10,9 @@ import itertools as it +T = t.TypeVar("T", bound=ATTR_TYPES) +T1 = t.TypeVar("T1", bound=t.Union[ATTR_TYPES, t.Any]) + set_recursion_limit(10000) @@ -18,7 +21,6 @@ class Symbol: prop_list: 'list[str]' = [] md: 't.Union[str, CustomMarkdown]' = "" rst: 't.Union[str, CustomRst]' = "" - nl:'bool' = False type: 't.Literal["block", "void", "inline"]' = "inline" collection = Collection() @@ -32,7 +34,7 @@ def __init_subclass__(cls, **kwargs) -> None: cls._cuuid = it.count() super().__init_subclass__(**kwargs) - def __init__(self, styles:'dict[str,str]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None, **props:'ATTR_TYPES'): + def __init__(self, inner:'list[Symbol]'=None, **props:'ATTR_TYPES'): cls = type(self) self.parent:'Symbol' = None @@ -40,19 +42,22 @@ def __init__(self, styles:'dict[str,str]'=None, classes:'list[str]'=None, inner: self.html_written_props = "" self.document = InnerHTML(self) - if styles is None: - styles = {} - if classes is None: - classes = [] if inner is None: inner = [] - self.styles: 'dict[str, str]' = styles - self.classes: 'list[str]' = classes self.children:'List[Symbol]' = List(inner) or List() self.props: 'dict[str, ATTR_TYPES]' = props self.nuuid = next(cls._cuuid) + @property + def styles(self): + return self.props.get("style", {}) + + @property + def classes(self): + return self.props.get("class", []) + + @property def uuid(self): return f"{type(self).__name__}-{self.nuuid}" @@ -64,7 +69,7 @@ def text(self) -> 'str': return "".join([e.text for e in self.children]) - def copy(self, styles:'dict[str,str]'=None, classes:'list[str]'=None, inner:'list[Symbol]'=None): + def copy(self, styles:'dict[str,str]'=None): if inner is None: inner = [] if styles is None: @@ -212,10 +217,10 @@ def handle_element(element:'ELEMENT|TEXT'): attributes = text["attributes"] # Handle class attribute separately if it exists - classes = [] + classes:'list[str]' = [] if "class" in attributes: classes = attributes["class"].split() if isinstance(attributes["class"], str) else attributes["class"] - del attributes["class"] + attributes["class"] = classes # Handle style attribute separately if it exists styles = {} @@ -225,19 +230,20 @@ def handle_element(element:'ELEMENT|TEXT'): styles = dict(item.split(":") for item in style_str.split(";") if ":" in item) elif isinstance(style_str, dict): styles = style_str - del attributes["style"] + attributes["style"] = styles inner=[handle_element(elm) for elm in text["children"]] return cls( - styles=styles, - classes=classes, inner=inner, **attributes ) - def get_prop(self, prop, default=""): - return self.props.get(prop, default) + def get_prop(self, prop:'str', default: 'T1'=None) -> 'ATTR_TYPES| T1': + try: + return self.props.get(prop, default) if default is not None else self.props.get(prop) + except Exception as e: + raise e def set_prop(self, prop, value): self.props[prop] = value diff --git a/BetterMD/elements/table.py b/BetterMD/elements/table.py index 6dfbcf6..4deb859 100644 --- a/BetterMD/elements/table.py +++ b/BetterMD/elements/table.py @@ -188,19 +188,39 @@ class Table(Symbol): html = "table" md = TableMD() rst = TableRST() - def __init__(self, styles: 'dict[str, str]' = None, classes: 'list[str]' = None, inner: 'list[Symbol]' = None, **props: 'ATTR_TYPES'): + def __init__(self, inner: 'list[Symbol]' = None, **props: 'ATTR_TYPES'): self.head:'THead' = None self.body:'TBody' = None self.foot:'TFoot' = None self.widths:'list[int]' = [] - self.cols: 'defaultdict[Th|Td|HeadlessTd, list[Td | Th | HeadlessTd]]' = defaultdict(list) + self.cols: 'defaultdict[Th|Td|HeadlessTd, list[Td | Th]]' = defaultdict(list) self.headers: 'list[Th]' = [] - super().__init__(styles, classes, inner, **props) + super().__init__(inner, **props) + def prepare(self, parent: Symbol = None, dom: list[Symbol] = None, *args, **kwargs): return super().prepare(parent, dom, *args, **kwargs, table=self) + + def to_dict(self): + return { + k.data: [d.data for d in v] for k, v in self.cols.items() + } + + @classmethod + def from_dict(cls, data:'dict[str, list[str]]'): + self = cls() + head = THead.from_list(list(data.keys())) + body = TBody.from_list(list(data.values())) + + self.head = head + self.body = body + + self.add_child(head) + self.add_child(body) + + return self def to_pandas(self): if not self.prepared: @@ -234,28 +254,6 @@ def to_pandas(self): logger.error(f"Error converting table to pandas: {str(e)}") raise - def to_list(self): - if not self.prepared: - self.prepare() - ret = [] - - if self.head is not None: - ret.append(self.head.to_list()) - else: - ret.append([]) - - if self.body is not None: - ret.append(self.body.to_list()) - else: - ret.append([]) - - if self.foot is not None: - ret.append(self.foot.to_list()) - else: - ret.append([]) - - return ret - @classmethod def from_pandas(cls, df:'pd.DataFrame'): logger.debug(f"Creating Table from pandas DataFrame with shape {df.shape}") @@ -283,45 +281,82 @@ def from_pandas(cls, df:'pd.DataFrame'): logger.error(f"Error creating table from pandas: {str(e)}") raise + def to_list(self): + if not self.prepared: + self.prepare() + ret = [] + + if self.head is not None: + ret.append(self.head.to_list()) + else: + ret.append([]) + + if self.body is not None: + ret.append(self.body.to_list()) + else: + ret.append([]) + + if self.foot is not None: + ret.append(self.foot.to_list()) + else: + ret.append([]) + + return ret + + def from_list(cls, lst:'list[list[list[str] | str]]'): + logger.debug(f"Creating Table from list of lists with shape {len(lst)}") + self = cls() + head = THead.from_list(lst[0]) + body = TBody.from_list(lst[1]) + foot = TFoot.from_list(lst[2]) + + self.head = head + self.body = body + self.foot = foot + + return self + + class THead(Symbol): html = "thead" rst = THeadRST() md = THeadMD() - def __init__(self, styles: 'dict[str, str]' = None, classes: 'list[str]' = None, inner: 'list[Symbol]' = None, **props: 'ATTR_TYPES'): + def __init__(self, inner: 'list[Symbol]' = None, **props: 'ATTR_TYPES'): self.table:'Table' = None self.data:'list[Tr]' = [] - super().__init__(styles, classes, inner, **props) + super().__init__(inner, **props) + + def __len__(self): + return len(self.data) + + def __iter__(self): + return iter(self.data) def to_pandas(self) -> 'pd.Index': try: import pandas as pd - if len(self.data) == 0: - return pd.Index([]) - - elif len(self.data) == 1: - return pd.Index([d.data for d in self.data[0].data]) - return pd.MultiIndex.from_arrays([[d.data for d in row.data] for row in self.data]) + except ImportError: logger.error("pandas not installed - tables extra required") raise ImportError("`tables` extra is required to use `to_pandas`") - def to_list(self) -> 'list[list[str]]': - if not self.prepared: - self.prepare() - - return [row.to_list() for row in self.data] - @classmethod def from_pandas(cls, data:'pd.Index | pd.MultiIndex'): self = cls() self.add_child(Tr.from_pandas(data, head=True)) return self + def to_list(self) -> 'list[list[str]]': + if not self.prepared: + self.prepare() + + return [row.to_list() for row in self.data] + @classmethod - def from_list(cls, data:'list[str]|list[list[str]]'): + def from_list(cls, data:'list[list[str] | str]'): self = cls() if isinstance(data[0], list): self.extend_children([Tr.from_list(d, head=True) for d in data]) @@ -342,19 +377,17 @@ class TBody(Symbol): rst = TBodyRST() md = TBodyMD() - def __init__(self, styles: dict[str, str] = None, classes: list[str] = None, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): + def __init__(self, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): self.table:'Table' = None self.data :'list[Tr]' = [] - super().__init__(styles, classes, inner, **props) - + super().__init__(inner, **props) - def to_rst(self) -> 'str': - if isinstance(self.rst, CustomRst): - return self.rst.to_rst(self.children, self, self.parent) + def __len__(self): + return len(self.data) - inner_rst = " ".join([e.to_rst() for e in self.children]) - return f"{self.rst}{inner_rst}{self.rst}\n" + def __iter__(self): + return iter(self.data) def to_pandas(self) -> 'list[pd.Series]': if not self.prepared: @@ -381,6 +414,12 @@ def from_pandas(cls, df:'pd.DataFrame'): logger.error("pandas not installed - tables extra required") raise ImportError("`tables` extra is required to use `from_pandas`") + def to_list(self): + if not self.prepared: + self.prepare() + + return [row.to_list() for row in self.data] + @classmethod def from_list(cls, data:'list[list[str]]'): try: @@ -391,12 +430,9 @@ def from_list(cls, data:'list[list[str]]'): except Exception as e: logger.error(f"Exception occurred in `from_list`: {e}") + raise e - def to_list(self): - if not self.prepared: - self.prepare() - - return [row.to_list() for row in self.data] + return self def prepare(self, parent = None, dom=None, table=None, *args, **kwargs): assert isinstance(table, Table) @@ -411,18 +447,24 @@ class TFoot(Symbol): rst = TBodyRST() - def __init__(self, styles: dict[str, str] = None, classes: list[str] = None, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): + def __init__(self, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): self.table:'Table' = None self.data :'list[Tr]' = [] - super().__init__(styles, classes, inner, **props) + super().__init__(inner, **props) + + def __len__(self): + return len(self.data) + + def __iter__(self): + return iter(self.data) def to_pandas(self): if not self.prepared: self.prepare() logger.debug("Converting TFoot to pandas format") - data = [e.to_pandas() for e in self.children] + data = [e.to_pandas() for e in self.data] logger.debug(f"Converted {len(data)} rows from TFoot") return data @@ -447,6 +489,13 @@ def to_list(self): self.prepare() return [e.to_list() for e in self.data] + + @classmethod + def from_list(cls, data:'list[list[str]]'): + self = cls() + for row in data: + self.add_child(Tr.from_list(row)) + return self def prepare(self, parent = None, dom=None, table=None, *args, **kwargs): assert isinstance(table, Table) @@ -460,12 +509,18 @@ class Tr(Symbol): md = TrMD() rst = TrRST() - def __init__(self, styles: dict[str, str] = None, classes: list[str] = None, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): + def __init__(self, inner: list[Symbol] = None, **props: str | bool | int | float | list | dict): self.head:'THead|TBody|TFoot' = None self.table:'Table' = None self.data:'list[t.Union[Td, Th]]' = [] - super().__init__(styles, classes, inner, **props) + super().__init__(inner, **props) + + def __len__(self): + return len(self.data) + + def __iter__(self): + return iter(self.data) def to_pandas(self): if not self.prepared: @@ -535,8 +590,8 @@ def prepare(self, parent = None, dom=None, table=None, head:'THead|TBody|TFoot'= return ret class Data(Symbol): - def __init__(self, styles: dict[str, str] = None, classes: list[str] = None, inner: list[Symbol] = None, **props: 'ATTR_TYPES'): - super().__init__(styles, classes, inner, **props) + def __init__(self, inner: list[Symbol] = None, **props: 'ATTR_TYPES'): + super().__init__(inner, **props) self.row:'Tr' = None @property diff --git a/BetterMD/elements/track.py b/BetterMD/elements/track.py index 89661bb..c5a33af 100644 --- a/BetterMD/elements/track.py +++ b/BetterMD/elements/track.py @@ -4,6 +4,6 @@ class Track(Symbol): prop_list = ["default", "kind", "label", "src", "srclang"] html = "track" - md = "" + md = "" rst = "" - self_closing = True \ No newline at end of file + type = "void" \ No newline at end of file diff --git a/BetterMD/typing.py b/BetterMD/typing.py index 7a6f371..d705118 100644 --- a/BetterMD/typing.py +++ b/BetterMD/typing.py @@ -1,6 +1,6 @@ import typing as t -ATTR_TYPES = t.Union[str, bool, int, float, list, dict] +ATTR_TYPES = t.Union[str, bool, int, float, list['ATTR_TYPES'], dict[str, 'ATTR_TYPES']] ATTRS = t.Union[ t.TypedDict("ATTRS", { diff --git a/setup.py b/setup.py index ff97da9..93c6b35 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -VERSION = "0.3.2" +VERSION = "0.3.3" DESCRIPTION = "A better markdown library" def read(path):