Skip to content

Commit cb540b6

Browse files
committed
Get rid of _customize_token()
The only thing this function was doing was replacing `href_token` instances with `MinimalHrefToken`. We did this at a time when we were using parts of the tokenization internals from lxml instead of fully forking it. We have long since fully forked it, however, and we should just be creating `MinimalHrefToken` where we want them in the first place instead of looping through and replacing other tokens with them.
1 parent 2e4483d commit cb540b6

File tree

1 file changed

+9
-29
lines changed

1 file changed

+9
-29
lines changed

web_monitoring_diff/html_render_diff.py

+9-29
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ def fixup_chunks(chunks, comparator):
831831

832832
elif current_token == TokenType.href:
833833
href = chunk[1]
834-
cur_word = href_token(href, comparator=comparator, pre_tags=tag_accum, trailing_whitespace=" ")
834+
cur_word = MinimalHrefToken(href, comparator=comparator, pre_tags=tag_accum, trailing_whitespace=" ")
835835
tag_accum = []
836836
result.append(cur_word)
837837

@@ -1112,20 +1112,19 @@ def _customize_tokens(tokens):
11121112
# result.append(SpacerToken(SPACER_STRING))
11131113
# result.append(SpacerToken(SPACER_STRING))
11141114

1115-
customized = _customize_token(token)
1116-
result.append(customized)
1115+
result.append(token)
11171116

1118-
if str(customized) == "Posts" and str(tokens[token_index - 1]) == 'Other' and str(tokens[token_index - 2]) == 'and': # and str(tokens[token_index - 3]) == 'posts':
1117+
if str(token) == "Posts" and str(tokens[token_index - 1]) == 'Other' and str(tokens[token_index - 2]) == 'and': # and str(tokens[token_index - 3]) == 'posts':
11191118
logger.debug(f'SPECIAL TAG!\n pre: {token.pre_tags}\n token: "{token}"\n post: {token.post_tags}')
11201119
next_token = tokens[token_index + 1]
11211120
logger.debug(f'SPECIAL TAG!\n pre: {next_token.pre_tags}\n token: "{next_token}"\n post: {next_token.post_tags}')
1122-
for tag_index, tag in enumerate(customized.post_tags):
1121+
for tag_index, tag in enumerate(token.post_tags):
11231122
if tag.startswith('</ul>'):
11241123
new_token = SpacerToken(SPACER_STRING)
11251124
result.append(new_token)
1126-
new_token = SpacerToken(SPACER_STRING, pre_tags=customized.post_tags[tag_index:])
1125+
new_token = SpacerToken(SPACER_STRING, pre_tags=token.post_tags[tag_index:])
11271126
result.append(new_token)
1128-
customized.post_tags = customized.post_tags[:tag_index]
1127+
token.post_tags = token.post_tags[:tag_index]
11291128

11301129
# if isinstance(customized, ImgTagToken):
11311130
# result.append(SpacerToken(SPACER_STRING))
@@ -1143,7 +1142,7 @@ def _customize_tokens(tokens):
11431142
# # result.append(SpacerToken(SPACER_STRING, post_tags=customized.post_tags, trailing_whitespace=customized.trailing_whitespace))
11441143
# customized.post_tags = []
11451144
# # customized.trailing_whitespace = ''
1146-
for tag_index, tag in enumerate(customized.post_tags):
1145+
for tag_index, tag in enumerate(token.post_tags):
11471146
split_here = False
11481147
for name in SEPARATABLE_TAGS:
11491148
if tag.startswith(f'<{name}'):
@@ -1156,8 +1155,8 @@ def _customize_tokens(tokens):
11561155
# new_token = SpacerToken(SPACER_STRING, pre_tags=customized.post_tags[tag_index:])
11571156
# customized.post_tags = customized.post_tags[0:tag_index]
11581157

1159-
new_token = SpacerToken(SPACER_STRING, post_tags=customized.post_tags[tag_index:])
1160-
customized.post_tags = customized.post_tags[0:tag_index]
1158+
new_token = SpacerToken(SPACER_STRING, post_tags=token.post_tags[tag_index:])
1159+
token.post_tags = token.post_tags[0:tag_index]
11611160

11621161
# tokens.insert(token_index + 1, token)
11631162
# token = new_token
@@ -1193,25 +1192,6 @@ def _has_heading_tags(tag_list):
11931192
return True
11941193

11951194

1196-
# Seemed so nice and clean! But should probably be merged into
1197-
# `_customize_tokens()` now. Or otherwise it needs to be able to produce more
1198-
# than one token to replace the given token in the stream.
1199-
def _customize_token(token):
1200-
"""
1201-
Replace existing diffing tokens with customized ones for better output.
1202-
"""
1203-
if isinstance(token, href_token):
1204-
return MinimalHrefToken(
1205-
str(token),
1206-
comparator=token.comparator,
1207-
pre_tags=token.pre_tags,
1208-
post_tags=token.post_tags,
1209-
trailing_whitespace=token.trailing_whitespace)
1210-
# return token
1211-
else:
1212-
return token
1213-
1214-
12151195
# TODO: merge and reconcile this with `merge_change_groups()`, which is 90%
12161196
# the same thing; it outputs the change elements as nested lists of tokens.
12171197
def merge_changes(change_chunks, doc, tag_type='ins'):

0 commit comments

Comments
 (0)