Freezing message parser fix #2

- this has been tested on Windows as well, and has been cleaned up.
  There is now a permanent parser thread, and it restarts when the
  parsing takes more than 1 second
- Fixes #900
This commit is contained in:
Peter Šurda 2016-10-22 01:45:32 +02:00
parent 8f194296e7
commit 47e2df86b9
Signed by untrusted user: PeterSurda
GPG Key ID: 0C5F50C0B5F37D87
4 changed files with 49 additions and 27 deletions

View File

@ -1,5 +1,7 @@
from PyQt4 import QtCore, QtGui from PyQt4 import QtCore, QtGui
import multiprocessing
import Queue
from urlparse import urlparse from urlparse import urlparse
from safehtmlparser import * from safehtmlparser import *

View File

@ -1,24 +1,28 @@
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
import inspect import inspect
import multiprocessing
import re import re
import Queue
from urllib import quote, quote_plus from urllib import quote, quote_plus
from urlparse import urlparse from urlparse import urlparse
import multiprocessing
import Queue
from debug import logger from debug import logger
from shared import parserInputQueue, parserOutputQueue, parserProcess, parserLock
def regexpSubprocess(queue): def regexpSubprocess(parserInputQueue, parserOutputQueue):
for data in iter(parserInputQueue.get, None):
if data is None:
break;
try: try:
result = queue.get()
result = SafeHTMLParser.uriregex1.sub( result = SafeHTMLParser.uriregex1.sub(
r'<a href="\1">\1</a>', r'<a href="\1">\1</a>',
result) data)
result = SafeHTMLParser.uriregex2.sub(r'<a href="\1&', result) result = SafeHTMLParser.uriregex2.sub(r'<a href="\1&', result)
result = SafeHTMLParser.emailregex.sub(r'<a href="mailto:\1">\1</a>', result) result = SafeHTMLParser.emailregex.sub(r'<a href="mailto:\1">\1</a>', result)
parserOutputQueue.put(result)
except SystemExit:
break;
except: except:
pass break;
else:
queue.put(result)
class SafeHTMLParser(HTMLParser): class SafeHTMLParser(HTMLParser):
# from html5lib.sanitiser # from html5lib.sanitiser
@ -106,22 +110,28 @@ class SafeHTMLParser(HTMLParser):
self.sanitised += "&" + name + ";" self.sanitised += "&" + name + ";"
def feed(self, data): def feed(self, data):
global parserLock, parserProcess, parserInputQueue, parserOutputQueue
HTMLParser.feed(self, data) HTMLParser.feed(self, data)
tmp = SafeHTMLParser.multi_replace(data) tmp = SafeHTMLParser.multi_replace(data)
tmp = unicode(tmp, 'utf-8', 'replace') tmp = unicode(tmp, 'utf-8', 'replace')
queue = multiprocessing.Queue() parserLock.acquire()
parser = multiprocessing.Process(target=regexpSubprocess, name="RegExParser", args=(queue,)) if parserProcess is None:
parser.start() parserProcess = multiprocessing.Process(target=regexpSubprocess, name="RegExParser", args=(parserInputQueue, parserOutputQueue))
queue.put(tmp) parserProcess.start()
parser.join(1) parserLock.release()
if parser.is_alive(): parserInputQueue.put(tmp)
parser.terminate()
parser.join()
try: try:
tmp = queue.get(False) tmp = parserOutputQueue.get(True, 1)
except (Queue.Empty, StopIteration) as e: except Queue.Empty:
logger.error("Regular expression parsing timed out, not displaying links") logger.error("Regular expression parsing timed out, not displaying links")
parserLock.acquire()
parserProcess.terminate()
parserProcess = multiprocessing.Process(target=regexpSubprocess, name="RegExParser", args=(parserInputQueue, parserOutputQueue))
parserProcess.start()
parserLock.release()
else:
pass
self.raw += tmp self.raw += tmp
def is_html(self, text = None, allow_picture = False): def is_html(self, text = None, allow_picture = False):

View File

@ -44,7 +44,9 @@ def convertStringToInt(s):
def signal_handler(signal, frame): def signal_handler(signal, frame):
logger.error("Got signal %i in %s/%s", signal, current_process().name, current_thread().name) logger.error("Got signal %i in %s/%s", signal, current_process().name, current_thread().name)
if current_process().name == "RegExParser": if current_process().name == "RegExParser":
sys.exit(0) # on Windows this isn't triggered, but it's fine, it has its own process termination thing
print "RegExParser interrupted"
raise SystemExit
if current_process().name != "MainProcess": if current_process().name != "MainProcess":
raise StopIteration("Interrupted") raise StopIteration("Interrupted")
if current_thread().name != "MainThread": if current_thread().name != "MainThread":

View File

@ -16,7 +16,7 @@ import os
import pickle import pickle
import Queue import Queue
import random import random
from multiprocessing import active_children from multiprocessing import active_children, Queue as mpQueue, Lock as mpLock
from signal import SIGTERM from signal import SIGTERM
import socket import socket
import sys import sys
@ -48,6 +48,10 @@ myAddressesByTag = {} # The key in this dictionary is the tag generated from the
broadcastSendersForWhichImWatching = {} broadcastSendersForWhichImWatching = {}
workerQueue = Queue.Queue() workerQueue = Queue.Queue()
UISignalQueue = Queue.Queue() UISignalQueue = Queue.Queue()
parserInputQueue = mpQueue()
parserOutputQueue = mpQueue()
parserProcess = None
parserLock = mpLock()
addressGeneratorQueue = Queue.Queue() addressGeneratorQueue = Queue.Queue()
knownNodesLock = threading.Lock() knownNodesLock = threading.Lock()
knownNodes = {} knownNodes = {}
@ -504,6 +508,10 @@ def isProofOfWorkSufficient(data,
def doCleanShutdown(): def doCleanShutdown():
global shutdown, thisapp global shutdown, thisapp
shutdown = 1 #Used to tell proof of work worker threads and the objectProcessorThread to exit. shutdown = 1 #Used to tell proof of work worker threads and the objectProcessorThread to exit.
try:
parserInputQueue.put(None, False)
except Queue.Full:
pass
for child in active_children(): for child in active_children():
try: try:
logger.info("Killing PoW child %i", child.pid) logger.info("Killing PoW child %i", child.pid)