HTML parser fix
- sometimes, a tag attribute name is UTF-8, sometimes it isn't. This change makes it handle both
This commit is contained in:
parent
32f1e0447a
commit
c335ef7d10
|
@ -82,7 +82,7 @@ class SafeHTMLParser(HTMLParser):
|
||||||
val == ""
|
val == ""
|
||||||
self.sanitised += " " + quote_plus(attr)
|
self.sanitised += " " + quote_plus(attr)
|
||||||
if not (val is None):
|
if not (val is None):
|
||||||
self.sanitised += "=\"" + unicode(val, 'utf-8', 'replace') + "\""
|
self.sanitised += "=\"" + val if isinstance(val, unicode) else unicode(val, 'utf-8', 'replace') + "\""
|
||||||
if inspect.stack()[1][3] == "handle_startendtag":
|
if inspect.stack()[1][3] == "handle_startendtag":
|
||||||
self.sanitised += "/"
|
self.sanitised += "/"
|
||||||
self.sanitised += ">"
|
self.sanitised += ">"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user