126 lines
4.5 KiB
Python
126 lines
4.5 KiB
Python
|
# ext/extract.py
|
||
|
# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
|
||
|
#
|
||
|
# This module is part of Mako and is released under
|
||
|
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
||
|
|
||
|
import re
|
||
|
|
||
|
from mako import compat
|
||
|
from mako import lexer
|
||
|
from mako import parsetree
|
||
|
|
||
|
|
||
|
class MessageExtractor(object):
|
||
|
def process_file(self, fileobj):
|
||
|
template_node = lexer.Lexer(
|
||
|
fileobj.read(), input_encoding=self.config["encoding"]
|
||
|
).parse()
|
||
|
for extracted in self.extract_nodes(template_node.get_children()):
|
||
|
yield extracted
|
||
|
|
||
|
def extract_nodes(self, nodes):
|
||
|
translator_comments = []
|
||
|
in_translator_comments = False
|
||
|
input_encoding = self.config["encoding"] or "ascii"
|
||
|
comment_tags = list(
|
||
|
filter(None, re.split(r"\s+", self.config["comment-tags"]))
|
||
|
)
|
||
|
|
||
|
for node in nodes:
|
||
|
child_nodes = None
|
||
|
if (
|
||
|
in_translator_comments
|
||
|
and isinstance(node, parsetree.Text)
|
||
|
and not node.content.strip()
|
||
|
):
|
||
|
# Ignore whitespace within translator comments
|
||
|
continue
|
||
|
|
||
|
if isinstance(node, parsetree.Comment):
|
||
|
value = node.text.strip()
|
||
|
if in_translator_comments:
|
||
|
translator_comments.extend(
|
||
|
self._split_comment(node.lineno, value)
|
||
|
)
|
||
|
continue
|
||
|
for comment_tag in comment_tags:
|
||
|
if value.startswith(comment_tag):
|
||
|
in_translator_comments = True
|
||
|
translator_comments.extend(
|
||
|
self._split_comment(node.lineno, value)
|
||
|
)
|
||
|
continue
|
||
|
|
||
|
if isinstance(node, parsetree.DefTag):
|
||
|
code = node.function_decl.code
|
||
|
child_nodes = node.nodes
|
||
|
elif isinstance(node, parsetree.BlockTag):
|
||
|
code = node.body_decl.code
|
||
|
child_nodes = node.nodes
|
||
|
elif isinstance(node, parsetree.CallTag):
|
||
|
code = node.code.code
|
||
|
child_nodes = node.nodes
|
||
|
elif isinstance(node, parsetree.PageTag):
|
||
|
code = node.body_decl.code
|
||
|
elif isinstance(node, parsetree.CallNamespaceTag):
|
||
|
code = node.expression
|
||
|
child_nodes = node.nodes
|
||
|
elif isinstance(node, parsetree.ControlLine):
|
||
|
if node.isend:
|
||
|
in_translator_comments = False
|
||
|
continue
|
||
|
code = node.text
|
||
|
elif isinstance(node, parsetree.Code):
|
||
|
in_translator_comments = False
|
||
|
code = node.code.code
|
||
|
elif isinstance(node, parsetree.Expression):
|
||
|
code = node.code.code
|
||
|
else:
|
||
|
continue
|
||
|
|
||
|
# Comments don't apply unless they immediately precede the message
|
||
|
if (
|
||
|
translator_comments
|
||
|
and translator_comments[-1][0] < node.lineno - 1
|
||
|
):
|
||
|
translator_comments = []
|
||
|
|
||
|
translator_strings = [
|
||
|
comment[1] for comment in translator_comments
|
||
|
]
|
||
|
|
||
|
if isinstance(code, compat.text_type):
|
||
|
code = code.encode(input_encoding, "backslashreplace")
|
||
|
|
||
|
used_translator_comments = False
|
||
|
# We add extra newline to work around a pybabel bug
|
||
|
# (see python-babel/babel#274, parse_encoding dies if the first
|
||
|
# input string of the input is non-ascii)
|
||
|
# Also, because we added it, we have to subtract one from
|
||
|
# node.lineno
|
||
|
code = compat.byte_buffer(compat.b("\n") + code)
|
||
|
|
||
|
for message in self.process_python(
|
||
|
code, node.lineno - 1, translator_strings
|
||
|
):
|
||
|
yield message
|
||
|
used_translator_comments = True
|
||
|
|
||
|
if used_translator_comments:
|
||
|
translator_comments = []
|
||
|
in_translator_comments = False
|
||
|
|
||
|
if child_nodes:
|
||
|
for extracted in self.extract_nodes(child_nodes):
|
||
|
yield extracted
|
||
|
|
||
|
@staticmethod
|
||
|
def _split_comment(lineno, comment):
|
||
|
"""Return the multiline comment at lineno split into a list of
|
||
|
comment line numbers and the accompanying comment line"""
|
||
|
return [
|
||
|
(lineno + index, line)
|
||
|
for index, line in enumerate(comment.splitlines())
|
||
|
]
|