Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 145 additions & 75 deletions pytermgui/markup/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
from __future__ import annotations

import json
from typing import Callable, Iterator, Protocol, TypedDict
from typing import Callable, Iterator, Protocol, TypedDict, List
from warnings import filterwarnings, warn

from ..colors import Color
from ..exceptions import ColorSyntaxError, MarkupSyntaxError
from ..regex import RE_ANSI_NEW as RE_ANSI
from ..regex import RE_MACRO, RE_MARKUP, RE_POSITION
from ..regex import RE_MACRO, RE_MARKUP
from .style_maps import CLEARERS, REVERSE_CLEARERS, REVERSE_STYLES, STYLES
from .tokens import (
AliasToken,
Expand Down Expand Up @@ -189,120 +188,191 @@ def tokenize_markup(text: str) -> Iterator[Token]:
yield PlainToken(text[cursor:length])


def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements
text: str,
def _tokenize_ansi_color(
params: List[str],
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use list[str] here as we have __annotations__ imported for lower Python versions.

) -> Iterator[Token]:
"""Converts some ANSI-coded text into a stream of tokens.
"""Convert ANSI color code into a stream of tokens

Args:
text: Any valid ANSI-coded text.
params: List of parameters given to an SGR

Yields:
The generated tokens, in the order they occur within the text.
The generated tokens
"""
state = None
color_code = ""
for part in params:
if state is None:
if part in REVERSE_STYLES:
yield StyleToken(REVERSE_STYLES[part])
continue

cursor = 0
if part in REVERSE_CLEARERS:
yield ClearToken(REVERSE_CLEARERS[part])
continue

for matchobj in RE_ANSI.finditer(text):
start, end = matchobj.span()
if part in ("38", "48"):
state = "COLOR"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd use an Enum for state representations; makes it harder to have a typo lead to undefined behaviour.

color_code += part + ";"
continue

csi = matchobj.groups()[0:2]
link_osc = matchobj.groups()[2:4]
# standard colors
try:
yield ColorToken(part, Color.parse(part, localize=False))
continue

if cursor < start:
yield PlainToken(text[cursor:start])
except ColorSyntaxError as exc:
raise ValueError(f"Could not parse color tag {part!r}.") from exc

if link_osc != (None, None):
cursor = end
uri, label = link_osc
if state != "COLOR":
continue

yield HLinkToken(uri)
yield PlainToken(label)
yield ClearToken("/~")
color_code += part + ";"

# Ignore incomplete RGB colors
if (
color_code.startswith(("38;2;", "48;2;"))
and len(color_code.split(";")) != 6
):
continue

full, content = csi
try:
code = color_code

cursor = end
if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
stripped = code[5:-1]

code = ""
if code.startswith("4"):
stripped = "@" + stripped

# Position
posmatch = RE_POSITION.match(full)
code = stripped

if posmatch is not None:
ypos, xpos = posmatch.groups()
if not ypos and not xpos:
raise ValueError(
f"Cannot parse cursor when no position is supplied. Match: {posmatch!r}"
)
yield ColorToken(code, Color.parse(code, localize=False))

yield CursorToken(content, int(ypos) or None, int(xpos) or None)
except ColorSyntaxError:
continue

parts = content.split(";")

state = None
color_code = ""
for part in parts:
if state is None:
if part in REVERSE_STYLES:
yield StyleToken(REVERSE_STYLES[part])
continue

if part in REVERSE_CLEARERS:
yield ClearToken(REVERSE_CLEARERS[part])
continue

if part in ("38", "48"):
state = "COLOR"
color_code += part + ";"
continue
ESC="\x1b"

# standard colors
try:
yield ColorToken(part, Color.parse(part, localize=False))
continue
CSI="["
SGR="m"
CURSOR="H"

except ColorSyntaxError as exc:
raise ValueError(f"Could not parse color tag {part!r}.") from exc
OSC="]"
HYPERLINK="8"

if state != "COLOR":
continue
ST="\\"

color_code += part + ";"
SEP=";"

# Ignore incomplete RGB colors
if (
color_code.startswith(("38;2;", "48;2;"))
and len(color_code.split(";")) != 6
):
def tokenize_ansi( # pylint: disable=too-many-locals, too-many-branches, too-many-statements
text: str,
) -> Iterator[Token]:
"""Converts some ANSI-coded text into a stream of tokens.

Args:
text: Any valid ANSI-coded text.

Yields:
The generated tokens, in the order they occur within the text.
"""

## State machine status


cstate=None
params=[]

accumulator = ""


escaping = False

for char in text:
if char == ESC:
if cstate is None and len(accumulator) > 0:
yield PlainToken(accumulator)
accumulator = ""
escaping = True
continue


if escaping:
if char == CSI:
cstate = CSI
escaping = False
continue

try:
code = color_code
if char == OSC:
cstate = OSC
escaping = False
continue

if code.startswith(("38;2;", "48;2;", "38;5;", "48;5;")):
stripped = code[5:-1]
if char == ST:
if cstate == HYPERLINK:
params.append(accumulator)

if code.startswith("4"):
stripped = "@" + stripped
if sum(len(param) for param in params)> 0:
yield HLinkToken(params[2])
else:
yield ClearToken("/~")

params = []
accumulator = ""

cstate = None
escaping = False
continue

code = stripped
else:
raise ValueError(f"Unknown escape character, got {repr(char)}")

yield ColorToken(code, Color.parse(code, localize=False))

except ColorSyntaxError:
if cstate == OSC:
if char == HYPERLINK:
cstate = HYPERLINK
continue

state = None
color_code = ""
if char == SEP and cstate in (
HYPERLINK, CSI
):
params.append(accumulator)
accumulator = ""
continue

if cstate == CSI:
if char == SGR:
params.append(accumulator)
accumulator = ""

for token in _tokenize_ansi_color(params):
yield token

params = []
cstate = None
continue
if char == CURSOR:
params.append(accumulator)
accumulator = ""

if len(params) != 2:
raise ValueError("Invalid number of params for cursor token."
f"Expected 2, got {repr(params)}")

content = "".join((pos + ";" for pos in params))[:-1]
yield CursorToken(content, int(params[0]) or None, int(params[1]) or None)
params = []
cstate = None
continue

remaining = text[cursor:]
if len(remaining) > 0:
yield PlainToken(remaining)
accumulator += char

if len(accumulator) > 0:
yield PlainToken(accumulator)

def eval_alias(text: str, context: ContextDict) -> str:
"""Evaluates a space-delimited string of alias tags into their underlying value.
Expand Down
2 changes: 0 additions & 2 deletions pytermgui/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
from typing import Match

RE_LINK = re.compile(r"(?:\x1b\]8;;([^\\]*)\x1b\\([^\\]*?)\x1b\]8;;\x1b\\)")
RE_ANSI_NEW = re.compile(rf"(\x1b\[(.*?)[mH])|{RE_LINK.pattern}|(\x1b_G(.*?)\x1b\\)")
RE_ANSI = re.compile(r"(?:\x1b\[(.*?)[mH])|(?:\x1b\](.*?)\x1b\\)|(?:\x1b_G(.*?)\x1b\\)")
RE_MACRO = re.compile(r"(![a-z0-9_\-]+)(?:\(([\w\/\.?\-=:]+)\))?")
RE_MARKUP = re.compile(r"((\\*)\[([^\[\]]*)\])")
RE_POSITION = re.compile(r"\x1b\[(\d*?)(?:;(\d*))?H")
RE_PIXEL_SIZE = re.compile(r"\x1b\[4;([\d]+);([\d]+)t")

RE_256 = re.compile(r"^([\d]{1,3})$")
Expand Down
6 changes: 6 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ def test_parse(self):
== "\x1b[38;5;141m\x1b[48;5;61m\x1b[1mHELLO\x1b[0m"
), repr(tim.parse("[141 @61 bold !upper]Hello"))

def test_mutiple_hypertext_closing_sequence(self):
for plain in tim.group_styles(
"\x1b]8;;path.py\x1b\\inner\x1b]8;;\x1b\\\x1b]8;;\x1b\\outer"
):
assert "\x1b]8;;\x1b\\" not in plain.plain, repr(plain)


class TestFunctionality:
def test_alias(self):
Expand Down