Improved URI detector
This commit is contained in:
parent
ed5c8a01ef
commit
2adafdaadc
|
@ -21,7 +21,7 @@ class SafeHTMLParser(HTMLParser):
|
||||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||||
replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "<br/>"], ["\t", " "], [" ", " "], [" ", " "], ["<br/> ", "<br/> "]]
|
replaces = [["&", "&"], ["\"", """], ["<", "<"], [">", ">"], ["\n", "<br/>"], ["\t", " "], [" ", " "], [" ", " "], ["<br/> ", "<br/> "]]
|
||||||
src_schemes = [ "data" ]
|
src_schemes = [ "data" ]
|
||||||
uriregex1 = re.compile(r'(\b(?:https?|telnet|gopher|file|wais|ftp):[\w/#~:.?+=&%@!\-.:;?\\-]+?(?=[.:?\-]*(?:[^\w/#~:;.?+=&%@!\-.:?\-]|$)))')
|
uriregex1 = re.compile(r'(?i)\b((?:(https?|ftp):(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?]))')
|
||||||
uriregex2 = re.compile(r'<a href="([^"]+)&')
|
uriregex2 = re.compile(r'<a href="([^"]+)&')
|
||||||
emailregex = re.compile(r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})\b')
|
emailregex = re.compile(r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})\b')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user