HTML parser updates
HTML parser wasn't correctly handling img tags. Now it also by defaults disabled external schemas to prevent deanonymisation (even though the renderer actually doesn't support external schemas at the moment) Addresses #178
This commit is contained in:
parent
b202ac6fab
commit
f27ca0d3d6
|
@ -89,9 +89,9 @@ class MessageView(QtGui.QTextBrowser):
|
|||
|
||||
def setContent(self, data):
|
||||
self.html = SafeHTMLParser()
|
||||
self.html.allow_picture = True
|
||||
self.html.reset()
|
||||
self.html.reset_safe()
|
||||
self.html.allow_picture = True
|
||||
self.html.feed(data)
|
||||
self.html.close()
|
||||
self.showPlain()
|
||||
self.showPlain()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from HTMLParser import HTMLParser
|
||||
import inspect
|
||||
from urllib import quote, quote_plus
|
||||
from urlparse import urlparse
|
||||
|
||||
class SafeHTMLParser(HTMLParser):
|
||||
# from html5lib.sanitiser
|
||||
|
@ -18,6 +19,7 @@ class SafeHTMLParser(HTMLParser):
|
|||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||
replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "<br/>"], ["\t", " "], [" ", " "], [" ", " "], ["<br/> ", "<br/> "]]
|
||||
src_schemes = [ "data" ]
|
||||
|
||||
@staticmethod
|
||||
def multi_replace(text):
|
||||
|
@ -36,27 +38,33 @@ class SafeHTMLParser(HTMLParser):
|
|||
self.raw = u""
|
||||
self.sanitised = u""
|
||||
self.has_html = False
|
||||
self.allow_picture = False
|
||||
self.allow_external_src = False
|
||||
|
||||
def add_if_acceptable(self, tag, attrs = None):
|
||||
if not tag in self.acceptable_elements:
|
||||
if not tag in SafeHTMLParser.acceptable_elements:
|
||||
return
|
||||
self.sanitised += "<"
|
||||
if inspect.stack()[1][3] == "handle_endtag":
|
||||
self.sanitised += "/"
|
||||
self.sanitised += tag
|
||||
if not attrs is None:
|
||||
for attr in attrs:
|
||||
if tag == "img" and attr[0] == "src" and not self.allow_picture:
|
||||
attr[1] = ""
|
||||
self.sanitised += " " + quote_plus(attr[0])
|
||||
if not (attr[1] is None):
|
||||
self.sanitised += "=\"" + attr[1] + "\""
|
||||
for attr, val in attrs:
|
||||
if tag == "img" and attr == "src" and not self.allow_picture:
|
||||
val = ""
|
||||
elif attr == "src" and not self.allow_external_src:
|
||||
url = urlparse(val)
|
||||
if url.scheme not in SafeHTMLParser.src_schemes:
|
||||
val == ""
|
||||
self.sanitised += " " + quote_plus(attr)
|
||||
if not (val is None):
|
||||
self.sanitised += "=\"" + val + "\""
|
||||
if inspect.stack()[1][3] == "handle_startendtag":
|
||||
self.sanitised += "/"
|
||||
self.sanitised += ">"
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag in self.acceptable_elements:
|
||||
if tag in SafeHTMLParser.acceptable_elements:
|
||||
self.has_html = True
|
||||
self.add_if_acceptable(tag, attrs)
|
||||
|
||||
|
@ -64,7 +72,7 @@ class SafeHTMLParser(HTMLParser):
|
|||
self.add_if_acceptable(tag)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
if tag in self.acceptable_elements:
|
||||
if tag in SafeHTMLParser.acceptable_elements:
|
||||
self.has_html = True
|
||||
self.add_if_acceptable(tag, attrs)
|
||||
|
||||
|
@ -86,7 +94,7 @@ class SafeHTMLParser(HTMLParser):
|
|||
if text:
|
||||
self.reset()
|
||||
self.reset_safe()
|
||||
self.allow_picture = allow_picture
|
||||
self.feed(text)
|
||||
self.close()
|
||||
self.allow_picture = allow_picture
|
||||
return self.has_html
|
||||
return self.has_html
|
||||
|
|
Reference in New Issue
Block a user