Long message parsing fix

- while 448ceaa74c fixed slow rendering on
  windows, there was still a bug where overly long messages caused
  freezeing of the hyperlink regexp parser, which appears to happen on
  all platforms. Maybe it's a freeze, maybe it just takes too long. This
  patch aborts the regexp parser after 1 second and simply displays the
  message without hyperlinks being clickable. This doesn't affect HTML
  mode because there the links are kept as they are
- Fixes #900
This commit is contained in:
Peter Šurda 2016-10-21 15:54:02 +02:00
parent 2067040ff1
commit 8f194296e7
Signed by untrusted user: PeterSurda
GPG Key ID: 0C5F50C0B5F37D87
2 changed files with 33 additions and 6 deletions

View File

@ -3,6 +3,22 @@ import inspect
import re import re
from urllib import quote, quote_plus from urllib import quote, quote_plus
from urlparse import urlparse from urlparse import urlparse
import multiprocessing
import Queue
from debug import logger
def regexpSubprocess(queue):
try:
result = queue.get()
result = SafeHTMLParser.uriregex1.sub(
r'<a href="\1">\1</a>',
result)
result = SafeHTMLParser.uriregex2.sub(r'<a href="\1&', result)
result = SafeHTMLParser.emailregex.sub(r'<a href="mailto:\1">\1</a>', result)
except:
pass
else:
queue.put(result)
class SafeHTMLParser(HTMLParser): class SafeHTMLParser(HTMLParser):
# from html5lib.sanitiser # from html5lib.sanitiser
@ -62,7 +78,7 @@ class SafeHTMLParser(HTMLParser):
val == "" val == ""
self.sanitised += " " + quote_plus(attr) self.sanitised += " " + quote_plus(attr)
if not (val is None): if not (val is None):
self.sanitised += "=\"" + val + "\"" self.sanitised += "=\"" + unicode(val, 'utf-8', 'replace') + "\""
if inspect.stack()[1][3] == "handle_startendtag": if inspect.stack()[1][3] == "handle_startendtag":
self.sanitised += "/" self.sanitised += "/"
self.sanitised += ">" self.sanitised += ">"
@ -92,11 +108,20 @@ class SafeHTMLParser(HTMLParser):
def feed(self, data): def feed(self, data):
HTMLParser.feed(self, data) HTMLParser.feed(self, data)
tmp = SafeHTMLParser.multi_replace(data) tmp = SafeHTMLParser.multi_replace(data)
tmp = SafeHTMLParser.uriregex1.sub( tmp = unicode(tmp, 'utf-8', 'replace')
r'<a href="\1">\1</a>',
unicode(tmp, 'utf-8', 'replace')) queue = multiprocessing.Queue()
tmp = SafeHTMLParser.uriregex2.sub(r'<a href="\1&', tmp) parser = multiprocessing.Process(target=regexpSubprocess, name="RegExParser", args=(queue,))
tmp = SafeHTMLParser.emailregex.sub(r'<a href="mailto:\1">\1</a>', tmp) parser.start()
queue.put(tmp)
parser.join(1)
if parser.is_alive():
parser.terminate()
parser.join()
try:
tmp = queue.get(False)
except (Queue.Empty, StopIteration) as e:
logger.error("Regular expression parsing timed out, not displaying links")
self.raw += tmp self.raw += tmp
def is_html(self, text = None, allow_picture = False): def is_html(self, text = None, allow_picture = False):

View File

@ -43,6 +43,8 @@ def convertStringToInt(s):
def signal_handler(signal, frame): def signal_handler(signal, frame):
logger.error("Got signal %i in %s/%s", signal, current_process().name, current_thread().name) logger.error("Got signal %i in %s/%s", signal, current_process().name, current_thread().name)
if current_process().name == "RegExParser":
sys.exit(0)
if current_process().name != "MainProcess": if current_process().name != "MainProcess":
raise StopIteration("Interrupted") raise StopIteration("Interrupted")
if current_thread().name != "MainThread": if current_thread().name != "MainThread":