-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtoXComposeTools.py
More file actions
129 lines (83 loc) · 3.34 KB
/
toXComposeTools.py
File metadata and controls
129 lines (83 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import unicodedata as uc
from typing import Tuple, NewType
import requests
from bs4 import BeautifulSoup
from string import hexdigits
"""
Naming conventions:
Identifiers beggining with an uppercase character are type names.
Snake_case is for callables, cammelCase is for plain ol' data.
uhex : a string formatted "U𝑥",
where 𝑥 is a string of hexdigits. Eg. '→' has uhex 'U2192'
xc : a string formatted '<𝐴> [<𝐵> ⋯ ] : "𝐶" # 𝑁'
where 𝐴 and 𝐵 are keys, 𝐶 is the unicode char and 𝑁 is 𝐶's unicode
name.
…X… : (Uppercase 'X') conjuction of types in type names. Read as “AND.”
…2… : conversion from LHS to RHS, used in place of underscore in routine
name. Read as “TO.”
"""
CharXuhexXname = NewType('CharXuhexXname', Tuple[str, str, str])
def char2uhex(char: str) -> str:
assert len(char) == 1, 'Must look up single characters.'
return 'U'+'%04X' % ord(char)
def uhex2int(hx: str) -> int:
hx = ''.join(c for c in hx if c in hexdigits)
return int(hx, 16)
def lookup_char(char: str) -> CharXuhexXname:
try:
charname = uc.name(char)
except ValueError:
charname = "ERR: Name not found"
return (char,
char2uhex(char),
charname)
def lookup_uhex(uhx: str) -> CharXuhexXname:
char = chr(uhex2int(uhx))
try:
charname = uc.name(char)
except ValueError:
charname = "ERR: Name not found"
return (char,
char2uhex(char),
charname)
def lookup_name(name: str) -> CharXuhexXname:
char = uc.lookup(name)
return (char, char2uhex(char), name)
def CharXuhexXname2xc(cun: CharXuhexXname, keyStr: str = None) -> str:
(char, uhx, name) = cun
uhx = uhx.upper()
if not keyStr:
keyStr = ""
return keyStr + ' : "' + char + '" ' + uhx + ' # ' + name
def char2xc(char: str, keyStr=None) -> str:
return CharXuhexXname2xc(lookup_char(char), keyStr)
def uhex2xc(uhx: str, keyStr=None) -> str:
return CharXuhexXname2xc(lookup_uhex(uhx), keyStr)
def name2xc(name: str, keyStr=None) -> str:
return CharXuhexXname2xc(lookup_name(name), keyStr)
baseURL = 'http://www.ltg.ed.ac.uk/~richard/utf-8.cgi?input='
def char_lookup_url(char: str) -> str:
return baseURL + char + '&mode=char'
def uhex_lookup_url(uhex: str) -> str:
hx = ''.join(c for c in uhex if c in hexdigits)
return baseURL + hx + '&mode=hex'
def scrape_url(URL: str) -> CharXuhexXname:
response = requests.get(URL)
infoTRs = BeautifulSoup(response.text, 'html.parser').findAll('tr')
charData = {}
for row in infoTRs:
cells = row.findAll('td')
charData[cells[0].find(text=True)] = cells[1].find(text=True)
if charData['Character name'] is None:
charData['Character name'] = "ERR: Name not found"
return (charData['Character'],
"U"+charData['Hex code point'],
charData['Character name'])
def lookup_char_web(char: str) -> CharXuhexXname:
return scrape_url(char_lookup_url(char))
def lookup_uhex_web(uhex: str) -> CharXuhexXname:
return scrape_url(uhex_lookup_url(uhex))
def char2xc_web(char: str, keyStr=None) -> str:
return CharXuhexXname2xc(lookup_char_web(char), keyStr)
def uhex2xc_web(uhex: str, keyStr=None) -> str:
return CharXuhexXname2xc(lookup_uhex_web(uhex), keyStr)