diff --git a/.gitignore b/.gitignore index e2da8db..1b33ff0 100644 --- a/.gitignore +++ b/.gitignore @@ -97,4 +97,6 @@ ENV/ .ropeproject # mkdocs documentation -/site \ No newline at end of file +/site + +.idea diff --git a/README.md b/README.md index c95f37b..e57c4a0 100644 --- a/README.md +++ b/README.md @@ -38,4 +38,7 @@ $ python handler.py # Changelog -1.0 - Release \ No newline at end of file +``` +1.0 - Release +1.1 - Add keywords search +``` diff --git a/css/main.css b/css/main.css index dc2a427..6b2e3de 100755 --- a/css/main.css +++ b/css/main.css @@ -7,21 +7,21 @@ padding: 10px; } -#results { font-family: Verdana; font-size: 14px; } +#results { font-family: Verdana; font-size: 14px; } #results h1 { font-size: 150%; margin: 0; padding: 5px; -} -#results h2 { font-size: 125%; color: #666; } +} +#results h2 { font-size: 125%; color: #666; } #results .file { padding: 5px; } -#results .link { - padding: 3px; +#results .link { + padding: 3px; border: 1px solid #555; background: #eee none repeat scroll 0 0; -} +} #results .highlight { color: #000; background: yellow; } #results .result { @@ -35,7 +35,7 @@ background-color: #000 !important; border-color: #000 !important; } -.navbar-inverse .navbar-collapse, +.navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form { background-color: #000; } diff --git a/handler.py b/handler.py index dbc2d26..ef51d6b 100644 --- a/handler.py +++ b/handler.py @@ -1,6 +1,7 @@ from __future__ import print_function import tornado.ioloop, tornado.web, tornado.autoreload from tornado.escape import json_encode, json_decode +import json import safeurl, types, sys, re, mimetypes, glob, jsbeautifier, urlparse, pycurl import calendar, time, datetime @@ -15,7 +16,7 @@ #------------------------------------------------------------ class BaseHandler(tornado.web.RequestHandler): - + def get_current_user(self): return [] @@ -39,7 +40,7 @@ def get_current_user(self): class MainHandler(BaseHandler): def initialize(self): return - + def get(self): self.render( 'templates/index.html', @@ -52,13 +53,13 @@ def get(self): class ViewAboutHandler(BaseHandler): def initialize(self): return - + def get(self): self.render( 'templates/about.html', ) - + #------------------------------------------------------------ # /parse/ajax #------------------------------------------------------------ @@ -77,7 +78,7 @@ def find_str(self, s, char): return index index += 1 return -1 - + def findEntireLine(self, contents, str): lineNum = 0 for item in contents.split("\n"): @@ -85,13 +86,13 @@ def findEntireLine(self, contents, str): linkPos = self.find_str(item, str) return item,lineNum,linkPos lineNum = lineNum+1 - + def parseForLinks(self, contents): discoveredLinks = [] outputLinks = [] # ugh lol regex = r"[^/][`'\"]([\/][a-zA-Z0-9_.-]+)+(?!([gimuy]*[,;\s])|\/\2)" - links = re.finditer(regex, contents) + links = re.finditer(regex, contents) for link in links: linkStr = link.group(0) # discoveredLinks list to avoid dupes and complex dupe checks @@ -107,6 +108,33 @@ def parseForLinks(self, contents): }) return outputLinks + def parseForKeywords(self, contents, keywords=[]): + if len(keywords) == 0: + return [] + + discoveredLinks = [] + outputLinks = [] + # ugh yeah + + for keyword in keywords: + regex = r".*"+re.escape(keyword)+".*" + links = re.finditer(regex, contents) + for link in links: + linkStr = link.group(0) + # discoveredLinks list to avoid dupes and complex dupe checks + if linkStr not in discoveredLinks: + # get the entire line, line number, and link position + entireLine,lineNum,linkPos = self.findEntireLine(contents, linkStr) + discoveredLinks.append(linkStr) + # print(entireLine) + outputLinks.append({ + "line": entireLine, + "link": linkStr, + "lineNum": lineNum, + "linkPos": linkPos + }) + return outputLinks + def getFormattedTimestamp(self): d = datetime.datetime.now() formatted = "{}_{}_{}_{}-{}".format(d.month, d.day, d.year, d.hour, d.minute) @@ -115,25 +143,23 @@ def getFormattedTimestamp(self): def formatHTMLOutput(self, html): output = output + html return output - + def beautifyJS(self, content): return jsbeautifier.beautify(content) def isLongLine(self, line): - if len(line)>1000: - return True - return False - - def fileRoutine(self, url, content): + return len(line)>1000 + + def fileRoutine(self, url, content, keywords): html = "" - + # beautify the JS for cleaner parsing # note: this can be slow against large JS files and can lead to failure prettyContent = self.beautifyJS(content) - + # parse all the links out - parsedLinks = self.parseForLinks(prettyContent) - + parsedLinks = self.parseForLinks(prettyContent) + self.parseForKeywords(prettyContent, keywords) + # if we have results, start building HTML if parsedLinks: print("Discovered {} links in {}".format(len(parsedLinks), url)) @@ -141,7 +167,7 @@ def fileRoutine(self, url, content): # html = html+'