From 171bc83ec0bde275602d6545255d83400a36093d Mon Sep 17 00:00:00 2001 From: Peter Surda Date: Tue, 28 Feb 2017 22:47:56 +0100 Subject: [PATCH] HTML parser fix - URLs followed with space were broken --- src/bitmessageqt/safehtmlparser.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/bitmessageqt/safehtmlparser.py b/src/bitmessageqt/safehtmlparser.py index aa935785..a78991d3 100644 --- a/src/bitmessageqt/safehtmlparser.py +++ b/src/bitmessageqt/safehtmlparser.py @@ -19,15 +19,22 @@ class SafeHTMLParser(HTMLParser): 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'] - replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "
"], ["\t", "    "], [" ", "  "], [" ", "  "], ["
", "
 "]] + replaces_pre = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"]] + replaces_post = [["\n", "
"], ["\t", "    "], [" ", "  "], [" ", "  "], ["
", "
 "]] src_schemes = [ "data" ] uriregex1 = re.compile(r'(?i)\b((?:(https?|ftp|bitcoin):(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?]))') uriregex2 = re.compile(r' 1 and text[0] == " ": text = " " + text[1:] @@ -95,12 +102,13 @@ class SafeHTMLParser(HTMLParser): except UnicodeDecodeError: data = unicode(data, 'utf-8', errors='replace') HTMLParser.feed(self, data) - tmp = SafeHTMLParser.multi_replace(data) + tmp = SafeHTMLParser.replace_pre(data) tmp = SafeHTMLParser.uriregex1.sub( r'\1', tmp) tmp = SafeHTMLParser.uriregex2.sub(r'\1', tmp) + tmp = SafeHTMLParser.replace_post(tmp) self.raw += tmp def is_html(self, text = None, allow_picture = False):