-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode_parsers.py
72 lines (52 loc) · 2.38 KB
/
code_parsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from bs4 import BeautifulSoup
from helpers import is_relative, iGEM_URL, resolve_relative_URL
from pathlib import Path
import os
import re
from jsmin import jsmin
# process HTML files
def HTMLparser(config, path, contents, upload_map):
# https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
soup = BeautifulSoup(contents, 'html5lib')
queries = [('link', 'href'), ('script', 'src'), ('a', 'href'),
('applet', 'codebase'), ('area', 'href'), ('base', 'href'),
('blockquote', 'cite'), ('body', 'background'), ('del', 'cite'),
('form', 'action'), ('frame', 'longdesc'), ('frame', 'src'),
('head', 'profile'), ('iframe', 'longdesc'), ('iframe', 'src'),
('img', 'longdesc'), ('img', 'src'), ('img', 'usemap'),
('input', 'src'), ('input', 'usemap'), ('ins', 'cite'),
('object', 'classid'), ('object', 'codebase'), ('object', 'data'),
('object', 'usemap'), ('q', 'cite'), ('audio', 'src'),
('button', 'formaction'), ('command', 'icon'), ('embed', 'src'),
('html', 'manifest'), ('input', 'formaction'), ('source', 'src'),
('track', 'src'), ('video', 'poster'), ('video', 'src')]
# TODO: Replace URLs for AJAX loads as well
for (tag_name, attr) in queries:
query = soup.findAll(tag_name, attrs={attr: True})
for tag in query:
# TODO: Add data-nosub
tag[attr] = iGEM_URL(config, path, upload_map, tag[attr])
# TODO: Add error handling
contents = str(soup)
return contents
def CSSparser(config, path, contents, upload_map):
css = contents
# 1) Find all css links
exp = r'url\(\'?([(..)/].*?)\'?\)'
links = re.findall(exp, css)
for i in range(len(links)):
links[i] = links[i].split('?')[0]
links[i] = links[i].split('#')[0]
# 2) Clear all duplicates
links = list(dict.fromkeys(links))
# TODO: Think of a way to do replicate data-nosub for CSS
# print(links)
# 3) Replace all links with the absolute path
for link in links:
css = css.replace(link, iGEM_URL(config, path, upload_map, link))
return css
def JSparser(contents):
contents = jsmin(contents)
# TODO: URL replacement in JS
# look at Virginia's tasks/unit/html.js for this
return contents