HTML parser fix

- sometimes, a tag attribute name is UTF-8, sometimes it isn't. This
  change makes it handle both
This commit is contained in:
Peter Šurda 2016-10-23 09:02:27 +02:00
parent 32f1e0447a
commit c335ef7d10
Signed by: PeterSurda
GPG Key ID: 0C5F50C0B5F37D87

View File

@ -82,7 +82,7 @@ class SafeHTMLParser(HTMLParser):
val == "" val == ""
self.sanitised += " " + quote_plus(attr) self.sanitised += " " + quote_plus(attr)
if not (val is None): if not (val is None):
self.sanitised += "=\"" + unicode(val, 'utf-8', 'replace') + "\"" self.sanitised += "=\"" + val if isinstance(val, unicode) else unicode(val, 'utf-8', 'replace') + "\""
if inspect.stack()[1][3] == "handle_startendtag": if inspect.stack()[1][3] == "handle_startendtag":
self.sanitised += "/" self.sanitised += "/"
self.sanitised += ">" self.sanitised += ">"