HTML parser updates
HTML parser wasn't correctly handling img tags. Now it also by defaults disabled external schemas to prevent deanonymisation (even though the renderer actually doesn't support external schemas at the moment) Addresses #178
This commit is contained in:
parent
b202ac6fab
commit
f27ca0d3d6
|
@ -89,9 +89,9 @@ class MessageView(QtGui.QTextBrowser):
|
||||||
|
|
||||||
def setContent(self, data):
|
def setContent(self, data):
|
||||||
self.html = SafeHTMLParser()
|
self.html = SafeHTMLParser()
|
||||||
self.html.allow_picture = True
|
|
||||||
self.html.reset()
|
self.html.reset()
|
||||||
self.html.reset_safe()
|
self.html.reset_safe()
|
||||||
|
self.html.allow_picture = True
|
||||||
self.html.feed(data)
|
self.html.feed(data)
|
||||||
self.html.close()
|
self.html.close()
|
||||||
self.showPlain()
|
self.showPlain()
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
import inspect
|
import inspect
|
||||||
from urllib import quote, quote_plus
|
from urllib import quote, quote_plus
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
class SafeHTMLParser(HTMLParser):
|
class SafeHTMLParser(HTMLParser):
|
||||||
# from html5lib.sanitiser
|
# from html5lib.sanitiser
|
||||||
|
@ -18,6 +19,7 @@ class SafeHTMLParser(HTMLParser):
|
||||||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||||
replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "<br/>"], ["\t", " "], [" ", " "], [" ", " "], ["<br/> ", "<br/> "]]
|
replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "<br/>"], ["\t", " "], [" ", " "], [" ", " "], ["<br/> ", "<br/> "]]
|
||||||
|
src_schemes = [ "data" ]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def multi_replace(text):
|
def multi_replace(text):
|
||||||
|
@ -36,27 +38,33 @@ class SafeHTMLParser(HTMLParser):
|
||||||
self.raw = u""
|
self.raw = u""
|
||||||
self.sanitised = u""
|
self.sanitised = u""
|
||||||
self.has_html = False
|
self.has_html = False
|
||||||
|
self.allow_picture = False
|
||||||
|
self.allow_external_src = False
|
||||||
|
|
||||||
def add_if_acceptable(self, tag, attrs = None):
|
def add_if_acceptable(self, tag, attrs = None):
|
||||||
if not tag in self.acceptable_elements:
|
if not tag in SafeHTMLParser.acceptable_elements:
|
||||||
return
|
return
|
||||||
self.sanitised += "<"
|
self.sanitised += "<"
|
||||||
if inspect.stack()[1][3] == "handle_endtag":
|
if inspect.stack()[1][3] == "handle_endtag":
|
||||||
self.sanitised += "/"
|
self.sanitised += "/"
|
||||||
self.sanitised += tag
|
self.sanitised += tag
|
||||||
if not attrs is None:
|
if not attrs is None:
|
||||||
for attr in attrs:
|
for attr, val in attrs:
|
||||||
if tag == "img" and attr[0] == "src" and not self.allow_picture:
|
if tag == "img" and attr == "src" and not self.allow_picture:
|
||||||
attr[1] = ""
|
val = ""
|
||||||
self.sanitised += " " + quote_plus(attr[0])
|
elif attr == "src" and not self.allow_external_src:
|
||||||
if not (attr[1] is None):
|
url = urlparse(val)
|
||||||
self.sanitised += "=\"" + attr[1] + "\""
|
if url.scheme not in SafeHTMLParser.src_schemes:
|
||||||
|
val == ""
|
||||||
|
self.sanitised += " " + quote_plus(attr)
|
||||||
|
if not (val is None):
|
||||||
|
self.sanitised += "=\"" + val + "\""
|
||||||
if inspect.stack()[1][3] == "handle_startendtag":
|
if inspect.stack()[1][3] == "handle_startendtag":
|
||||||
self.sanitised += "/"
|
self.sanitised += "/"
|
||||||
self.sanitised += ">"
|
self.sanitised += ">"
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if tag in self.acceptable_elements:
|
if tag in SafeHTMLParser.acceptable_elements:
|
||||||
self.has_html = True
|
self.has_html = True
|
||||||
self.add_if_acceptable(tag, attrs)
|
self.add_if_acceptable(tag, attrs)
|
||||||
|
|
||||||
|
@ -64,7 +72,7 @@ class SafeHTMLParser(HTMLParser):
|
||||||
self.add_if_acceptable(tag)
|
self.add_if_acceptable(tag)
|
||||||
|
|
||||||
def handle_startendtag(self, tag, attrs):
|
def handle_startendtag(self, tag, attrs):
|
||||||
if tag in self.acceptable_elements:
|
if tag in SafeHTMLParser.acceptable_elements:
|
||||||
self.has_html = True
|
self.has_html = True
|
||||||
self.add_if_acceptable(tag, attrs)
|
self.add_if_acceptable(tag, attrs)
|
||||||
|
|
||||||
|
@ -86,7 +94,7 @@ class SafeHTMLParser(HTMLParser):
|
||||||
if text:
|
if text:
|
||||||
self.reset()
|
self.reset()
|
||||||
self.reset_safe()
|
self.reset_safe()
|
||||||
|
self.allow_picture = allow_picture
|
||||||
self.feed(text)
|
self.feed(text)
|
||||||
self.close()
|
self.close()
|
||||||
self.allow_picture = allow_picture
|
return self.has_html
|
||||||
return self.has_html
|
|
||||||
|
|
Reference in New Issue
Block a user