from __future__ import absolute_import import re import sys import copy import codecs import itertools from . import TypeSlots from .ExprNodes import not_a_constant import cython cython.declare(UtilityCode=object, EncodedString=object, bytes_literal=object, encoded_string=object, Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object, UtilNodes=object, _py_int_types=object) if sys.version_info[0] >= 3: _py_int_types = int _py_string_types = (bytes, str) else: _py_int_types = (int, long) _py_string_types = (bytes, unicode) from . import Nodes from . import ExprNodes from . import PyrexTypes from . import Visitor from . import Builtin from . import UtilNodes from . import Options from .Code import UtilityCode, TempitaUtilityCode from .StringEncoding import EncodedString, bytes_literal, encoded_string from .Errors import error, warning from .ParseTreeTransforms import SkipDeclarations try: from __builtin__ import reduce except ImportError: from functools import reduce try: from __builtin__ import basestring except ImportError: basestring = str # Python 3 def load_c_utility(name): return UtilityCode.load_cached(name, "Optimize.c") def unwrap_coerced_node(node, coercion_nodes=(ExprNodes.CoerceToPyTypeNode, ExprNodes.CoerceFromPyTypeNode)): if isinstance(node, coercion_nodes): return node.arg return node def unwrap_node(node): while isinstance(node, UtilNodes.ResultRefNode): node = node.expression return node def is_common_value(a, b): a = unwrap_node(a) b = unwrap_node(b) if isinstance(a, ExprNodes.NameNode) and isinstance(b, ExprNodes.NameNode): return a.name == b.name if isinstance(a, ExprNodes.AttributeNode) and isinstance(b, ExprNodes.AttributeNode): return not a.is_py_attr and is_common_value(a.obj, b.obj) and a.attribute == b.attribute return False def filter_none_node(node): if node is not None and node.constant_result is None: return None return node class _YieldNodeCollector(Visitor.TreeVisitor): """ YieldExprNode finder for generator expressions. """ def __init__(self): Visitor.TreeVisitor.__init__(self) self.yield_stat_nodes = {} self.yield_nodes = [] visit_Node = Visitor.TreeVisitor.visitchildren def visit_YieldExprNode(self, node): self.yield_nodes.append(node) self.visitchildren(node) def visit_ExprStatNode(self, node): self.visitchildren(node) if node.expr in self.yield_nodes: self.yield_stat_nodes[node.expr] = node # everything below these nodes is out of scope: def visit_GeneratorExpressionNode(self, node): pass def visit_LambdaNode(self, node): pass def visit_FuncDefNode(self, node): pass def _find_single_yield_expression(node): yield_statements = _find_yield_statements(node) if len(yield_statements) != 1: return None, None return yield_statements[0] def _find_yield_statements(node): collector = _YieldNodeCollector() collector.visitchildren(node) try: yield_statements = [ (yield_node.arg, collector.yield_stat_nodes[yield_node]) for yield_node in collector.yield_nodes ] except KeyError: # found YieldExprNode without ExprStatNode (i.e. a non-statement usage of 'yield') yield_statements = [] return yield_statements class IterationTransform(Visitor.EnvTransform): """Transform some common for-in loop patterns into efficient C loops: - for-in-dict loop becomes a while loop calling PyDict_Next() - for-in-enumerate is replaced by an external counter variable - for-in-range loop becomes a plain C for loop """ def visit_PrimaryCmpNode(self, node): if node.is_ptr_contains(): # for t in operand2: # if operand1 == t: # res = True # break # else: # res = False pos = node.pos result_ref = UtilNodes.ResultRefNode(node) if node.operand2.is_subscript: base_type = node.operand2.base.type.base_type else: base_type = node.operand2.type.base_type target_handle = UtilNodes.TempHandle(base_type) target = target_handle.ref(pos) cmp_node = ExprNodes.PrimaryCmpNode( pos, operator=u'==', operand1=node.operand1, operand2=target) if_body = Nodes.StatListNode( pos, stats = [Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=1)), Nodes.BreakStatNode(pos)]) if_node = Nodes.IfStatNode( pos, if_clauses=[Nodes.IfClauseNode(pos, condition=cmp_node, body=if_body)], else_clause=None) for_loop = UtilNodes.TempsBlockNode( pos, temps = [target_handle], body = Nodes.ForInStatNode( pos, target=target, iterator=ExprNodes.IteratorNode(node.operand2.pos, sequence=node.operand2), body=if_node, else_clause=Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=0)))) for_loop = for_loop.analyse_expressions(self.current_env()) for_loop = self.visit(for_loop) new_node = UtilNodes.TempResultFromStatNode(result_ref, for_loop) if node.operator == 'not_in': new_node = ExprNodes.NotNode(pos, operand=new_node) return new_node else: self.visitchildren(node) return node def visit_ForInStatNode(self, node): self.visitchildren(node) return self._optimise_for_loop(node, node.iterator.sequence) def _optimise_for_loop(self, node, iterable, reversed=False): annotation_type = None if (iterable.is_name or iterable.is_attribute) and iterable.entry and iterable.entry.annotation: annotation = iterable.entry.annotation if annotation.is_subscript: annotation = annotation.base # container base type # FIXME: generalise annotation evaluation => maybe provide a "qualified name" also for imported names? if annotation.is_name: if annotation.entry and annotation.entry.qualified_name == 'typing.Dict': annotation_type = Builtin.dict_type elif annotation.name == 'Dict': annotation_type = Builtin.dict_type if annotation.entry and annotation.entry.qualified_name in ('typing.Set', 'typing.FrozenSet'): annotation_type = Builtin.set_type elif annotation.name in ('Set', 'FrozenSet'): annotation_type = Builtin.set_type if Builtin.dict_type in (iterable.type, annotation_type): # like iterating over dict.keys() if reversed: # CPython raises an error here: not a sequence return node return self._transform_dict_iteration( node, dict_obj=iterable, method=None, keys=True, values=False) if (Builtin.set_type in (iterable.type, annotation_type) or Builtin.frozenset_type in (iterable.type, annotation_type)): if reversed: # CPython raises an error here: not a sequence return node return self._transform_set_iteration(node, iterable) # C array (slice) iteration? if iterable.type.is_ptr or iterable.type.is_array: return self._transform_carray_iteration(node, iterable, reversed=reversed) if iterable.type is Builtin.bytes_type: return self._transform_bytes_iteration(node, iterable, reversed=reversed) if iterable.type is Builtin.unicode_type: return self._transform_unicode_iteration(node, iterable, reversed=reversed) # the rest is based on function calls if not isinstance(iterable, ExprNodes.SimpleCallNode): return node if iterable.args is None: arg_count = iterable.arg_tuple and len(iterable.arg_tuple.args) or 0 else: arg_count = len(iterable.args) if arg_count and iterable.self is not None: arg_count -= 1 function = iterable.function # dict iteration? if function.is_attribute and not reversed and not arg_count: base_obj = iterable.self or function.obj method = function.attribute # in Py3, items() is equivalent to Py2's iteritems() is_safe_iter = self.global_scope().context.language_level >= 3 if not is_safe_iter and method in ('keys', 'values', 'items'): # try to reduce this to the corresponding .iter*() methods if isinstance(base_obj, ExprNodes.CallNode): inner_function = base_obj.function if (inner_function.is_name and inner_function.name == 'dict' and inner_function.entry and inner_function.entry.is_builtin): # e.g. dict(something).items() => safe to use .iter*() is_safe_iter = True keys = values = False if method == 'iterkeys' or (is_safe_iter and method == 'keys'): keys = True elif method == 'itervalues' or (is_safe_iter and method == 'values'): values = True elif method == 'iteritems' or (is_safe_iter and method == 'items'): keys = values = True if keys or values: return self._transform_dict_iteration( node, base_obj, method, keys, values) # enumerate/reversed ? if iterable.self is None and function.is_name and \ function.entry and function.entry.is_builtin: if function.name == 'enumerate': if reversed: # CPython raises an error here: not a sequence return node return self._transform_enumerate_iteration(node, iterable) elif function.name == 'reversed': if reversed: # CPython raises an error here: not a sequence return node return self._transform_reversed_iteration(node, iterable) # range() iteration? if Options.convert_range and 1 <= arg_count <= 3 and ( iterable.self is None and function.is_name and function.name in ('range', 'xrange') and function.entry and function.entry.is_builtin): if node.target.type.is_int or node.target.type.is_enum: return self._transform_range_iteration(node, iterable, reversed=reversed) if node.target.type.is_pyobject: # Assume that small integer ranges (C long >= 32bit) are best handled in C as well. for arg in (iterable.arg_tuple.args if iterable.args is None else iterable.args): if isinstance(arg, ExprNodes.IntNode): if arg.has_constant_result() and -2**30 <= arg.constant_result < 2**30: continue break else: return self._transform_range_iteration(node, iterable, reversed=reversed) return node def _transform_reversed_iteration(self, node, reversed_function): args = reversed_function.arg_tuple.args if len(args) == 0: error(reversed_function.pos, "reversed() requires an iterable argument") return node elif len(args) > 1: error(reversed_function.pos, "reversed() takes exactly 1 argument") return node arg = args[0] # reversed(list/tuple) ? if arg.type in (Builtin.tuple_type, Builtin.list_type): node.iterator.sequence = arg.as_none_safe_node("'NoneType' object is not iterable") node.iterator.reversed = True return node return self._optimise_for_loop(node, arg, reversed=True) PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType( PyrexTypes.c_char_ptr_type, [ PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) ]) PyBytes_GET_SIZE_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None) ]) def _transform_bytes_iteration(self, node, slice_node, reversed=False): target_type = node.target.type if not target_type.is_int and target_type is not Builtin.bytes_type: # bytes iteration returns bytes objects in Py2, but # integers in Py3 return node unpack_temp_node = UtilNodes.LetRefNode( slice_node.as_none_safe_node("'NoneType' is not iterable")) slice_base_node = ExprNodes.PythonCapiCallNode( slice_node.pos, "PyBytes_AS_STRING", self.PyBytes_AS_STRING_func_type, args = [unpack_temp_node], is_temp = 0, ) len_node = ExprNodes.PythonCapiCallNode( slice_node.pos, "PyBytes_GET_SIZE", self.PyBytes_GET_SIZE_func_type, args = [unpack_temp_node], is_temp = 0, ) return UtilNodes.LetNode( unpack_temp_node, self._transform_carray_iteration( node, ExprNodes.SliceIndexNode( slice_node.pos, base = slice_base_node, start = None, step = None, stop = len_node, type = slice_base_node.type, is_temp = 1, ), reversed = reversed)) PyUnicode_READ_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ucs4_type, [ PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_type, None), PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_type, None), PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None) ]) init_unicode_iteration_func_type = PyrexTypes.CFuncType( PyrexTypes.c_int_type, [ PyrexTypes.CFuncTypeArg("s", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("length", PyrexTypes.c_py_ssize_t_ptr_type, None), PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_ptr_type, None), PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_ptr_type, None) ], exception_value = '-1') def _transform_unicode_iteration(self, node, slice_node, reversed=False): if slice_node.is_literal: # try to reduce to byte iteration for plain Latin-1 strings try: bytes_value = bytes_literal(slice_node.value.encode('latin1'), 'iso8859-1') except UnicodeEncodeError: pass else: bytes_slice = ExprNodes.SliceIndexNode( slice_node.pos, base=ExprNodes.BytesNode( slice_node.pos, value=bytes_value, constant_result=bytes_value, type=PyrexTypes.c_const_char_ptr_type).coerce_to( PyrexTypes.c_const_uchar_ptr_type, self.current_env()), start=None, stop=ExprNodes.IntNode( slice_node.pos, value=str(len(bytes_value)), constant_result=len(bytes_value), type=PyrexTypes.c_py_ssize_t_type), type=Builtin.unicode_type, # hint for Python conversion ) return self._transform_carray_iteration(node, bytes_slice, reversed) unpack_temp_node = UtilNodes.LetRefNode( slice_node.as_none_safe_node("'NoneType' is not iterable")) start_node = ExprNodes.IntNode( node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type) length_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) end_node = length_temp.ref(node.pos) if reversed: relation1, relation2 = '>', '>=' start_node, end_node = end_node, start_node else: relation1, relation2 = '<=', '<' kind_temp = UtilNodes.TempHandle(PyrexTypes.c_int_type) data_temp = UtilNodes.TempHandle(PyrexTypes.c_void_ptr_type) counter_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) target_value = ExprNodes.PythonCapiCallNode( slice_node.pos, "__Pyx_PyUnicode_READ", self.PyUnicode_READ_func_type, args = [kind_temp.ref(slice_node.pos), data_temp.ref(slice_node.pos), counter_temp.ref(node.target.pos)], is_temp = False, ) if target_value.type != node.target.type: target_value = target_value.coerce_to(node.target.type, self.current_env()) target_assign = Nodes.SingleAssignmentNode( pos = node.target.pos, lhs = node.target, rhs = target_value) body = Nodes.StatListNode( node.pos, stats = [target_assign, node.body]) loop_node = Nodes.ForFromStatNode( node.pos, bound1=start_node, relation1=relation1, target=counter_temp.ref(node.target.pos), relation2=relation2, bound2=end_node, step=None, body=body, else_clause=node.else_clause, from_range=True) setup_node = Nodes.ExprStatNode( node.pos, expr = ExprNodes.PythonCapiCallNode( slice_node.pos, "__Pyx_init_unicode_iteration", self.init_unicode_iteration_func_type, args = [unpack_temp_node, ExprNodes.AmpersandNode(slice_node.pos, operand=length_temp.ref(slice_node.pos), type=PyrexTypes.c_py_ssize_t_ptr_type), ExprNodes.AmpersandNode(slice_node.pos, operand=data_temp.ref(slice_node.pos), type=PyrexTypes.c_void_ptr_ptr_type), ExprNodes.AmpersandNode(slice_node.pos, operand=kind_temp.ref(slice_node.pos), type=PyrexTypes.c_int_ptr_type), ], is_temp = True, result_is_used = False, utility_code=UtilityCode.load_cached("unicode_iter", "Optimize.c"), )) return UtilNodes.LetNode( unpack_temp_node, UtilNodes.TempsBlockNode( node.pos, temps=[counter_temp, length_temp, data_temp, kind_temp], body=Nodes.StatListNode(node.pos, stats=[setup_node, loop_node]))) def _transform_carray_iteration(self, node, slice_node, reversed=False): neg_step = False if isinstance(slice_node, ExprNodes.SliceIndexNode): slice_base = slice_node.base start = filter_none_node(slice_node.start) stop = filter_none_node(slice_node.stop) step = None if not stop: if not slice_base.type.is_pyobject: error(slice_node.pos, "C array iteration requires known end index") return node elif slice_node.is_subscript: assert isinstance(slice_node.index, ExprNodes.SliceNode) slice_base = slice_node.base index = slice_node.index start = filter_none_node(index.start) stop = filter_none_node(index.stop) step = filter_none_node(index.step) if step: if not isinstance(step.constant_result, _py_int_types) \ or step.constant_result == 0 \ or step.constant_result > 0 and not stop \ or step.constant_result < 0 and not start: if not slice_base.type.is_pyobject: error(step.pos, "C array iteration requires known step size and end index") return node else: # step sign is handled internally by ForFromStatNode step_value = step.constant_result if reversed: step_value = -step_value neg_step = step_value < 0 step = ExprNodes.IntNode(step.pos, type=PyrexTypes.c_py_ssize_t_type, value=str(abs(step_value)), constant_result=abs(step_value)) elif slice_node.type.is_array: if slice_node.type.size is None: error(slice_node.pos, "C array iteration requires known end index") return node slice_base = slice_node start = None stop = ExprNodes.IntNode( slice_node.pos, value=str(slice_node.type.size), type=PyrexTypes.c_py_ssize_t_type, constant_result=slice_node.type.size) step = None else: if not slice_node.type.is_pyobject: error(slice_node.pos, "C array iteration requires known end index") return node if start: start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) if stop: stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) if stop is None: if neg_step: stop = ExprNodes.IntNode( slice_node.pos, value='-1', type=PyrexTypes.c_py_ssize_t_type, constant_result=-1) else: error(slice_node.pos, "C array iteration requires known step size and end index") return node if reversed: if not start: start = ExprNodes.IntNode(slice_node.pos, value="0", constant_result=0, type=PyrexTypes.c_py_ssize_t_type) # if step was provided, it was already negated above start, stop = stop, start ptr_type = slice_base.type if ptr_type.is_array: ptr_type = ptr_type.element_ptr_type() carray_ptr = slice_base.coerce_to_simple(self.current_env()) if start and start.constant_result != 0: start_ptr_node = ExprNodes.AddNode( start.pos, operand1=carray_ptr, operator='+', operand2=start, type=ptr_type) else: start_ptr_node = carray_ptr if stop and stop.constant_result != 0: stop_ptr_node = ExprNodes.AddNode( stop.pos, operand1=ExprNodes.CloneNode(carray_ptr), operator='+', operand2=stop, type=ptr_type ).coerce_to_simple(self.current_env()) else: stop_ptr_node = ExprNodes.CloneNode(carray_ptr) counter = UtilNodes.TempHandle(ptr_type) counter_temp = counter.ref(node.target.pos) if slice_base.type.is_string and node.target.type.is_pyobject: # special case: char* -> bytes/unicode if slice_node.type is Builtin.unicode_type: target_value = ExprNodes.CastNode( ExprNodes.DereferenceNode( node.target.pos, operand=counter_temp, type=ptr_type.base_type), PyrexTypes.c_py_ucs4_type).coerce_to( node.target.type, self.current_env()) else: # char* -> bytes coercion requires slicing, not indexing target_value = ExprNodes.SliceIndexNode( node.target.pos, start=ExprNodes.IntNode(node.target.pos, value='0', constant_result=0, type=PyrexTypes.c_int_type), stop=ExprNodes.IntNode(node.target.pos, value='1', constant_result=1, type=PyrexTypes.c_int_type), base=counter_temp, type=Builtin.bytes_type, is_temp=1) elif node.target.type.is_ptr and not node.target.type.assignable_from(ptr_type.base_type): # Allow iteration with pointer target to avoid copy. target_value = counter_temp else: # TODO: can this safely be replaced with DereferenceNode() as above? target_value = ExprNodes.IndexNode( node.target.pos, index=ExprNodes.IntNode(node.target.pos, value='0', constant_result=0, type=PyrexTypes.c_int_type), base=counter_temp, type=ptr_type.base_type) if target_value.type != node.target.type: target_value = target_value.coerce_to(node.target.type, self.current_env()) target_assign = Nodes.SingleAssignmentNode( pos = node.target.pos, lhs = node.target, rhs = target_value) body = Nodes.StatListNode( node.pos, stats = [target_assign, node.body]) relation1, relation2 = self._find_for_from_node_relations(neg_step, reversed) for_node = Nodes.ForFromStatNode( node.pos, bound1=start_ptr_node, relation1=relation1, target=counter_temp, relation2=relation2, bound2=stop_ptr_node, step=step, body=body, else_clause=node.else_clause, from_range=True) return UtilNodes.TempsBlockNode( node.pos, temps=[counter], body=for_node) def _transform_enumerate_iteration(self, node, enumerate_function): args = enumerate_function.arg_tuple.args if len(args) == 0: error(enumerate_function.pos, "enumerate() requires an iterable argument") return node elif len(args) > 2: error(enumerate_function.pos, "enumerate() takes at most 2 arguments") return node if not node.target.is_sequence_constructor: # leave this untouched for now return node targets = node.target.args if len(targets) != 2: # leave this untouched for now return node enumerate_target, iterable_target = targets counter_type = enumerate_target.type if not counter_type.is_pyobject and not counter_type.is_int: # nothing we can do here, I guess return node if len(args) == 2: start = unwrap_coerced_node(args[1]).coerce_to(counter_type, self.current_env()) else: start = ExprNodes.IntNode(enumerate_function.pos, value='0', type=counter_type, constant_result=0) temp = UtilNodes.LetRefNode(start) inc_expression = ExprNodes.AddNode( enumerate_function.pos, operand1 = temp, operand2 = ExprNodes.IntNode(node.pos, value='1', type=counter_type, constant_result=1), operator = '+', type = counter_type, #inplace = True, # not worth using in-place operation for Py ints is_temp = counter_type.is_pyobject ) loop_body = [ Nodes.SingleAssignmentNode( pos = enumerate_target.pos, lhs = enumerate_target, rhs = temp), Nodes.SingleAssignmentNode( pos = enumerate_target.pos, lhs = temp, rhs = inc_expression) ] if isinstance(node.body, Nodes.StatListNode): node.body.stats = loop_body + node.body.stats else: loop_body.append(node.body) node.body = Nodes.StatListNode( node.body.pos, stats = loop_body) node.target = iterable_target node.item = node.item.coerce_to(iterable_target.type, self.current_env()) node.iterator.sequence = args[0] # recurse into loop to check for further optimisations return UtilNodes.LetNode(temp, self._optimise_for_loop(node, node.iterator.sequence)) def _find_for_from_node_relations(self, neg_step_value, reversed): if reversed: if neg_step_value: return '<', '<=' else: return '>', '>=' else: if neg_step_value: return '>=', '>' else: return '<=', '<' def _transform_range_iteration(self, node, range_function, reversed=False): args = range_function.arg_tuple.args if len(args) < 3: step_pos = range_function.pos step_value = 1 step = ExprNodes.IntNode(step_pos, value='1', constant_result=1) else: step = args[2] step_pos = step.pos if not isinstance(step.constant_result, _py_int_types): # cannot determine step direction return node step_value = step.constant_result if step_value == 0: # will lead to an error elsewhere return node step = ExprNodes.IntNode(step_pos, value=str(step_value), constant_result=step_value) if len(args) == 1: bound1 = ExprNodes.IntNode(range_function.pos, value='0', constant_result=0) bound2 = args[0].coerce_to_integer(self.current_env()) else: bound1 = args[0].coerce_to_integer(self.current_env()) bound2 = args[1].coerce_to_integer(self.current_env()) relation1, relation2 = self._find_for_from_node_relations(step_value < 0, reversed) bound2_ref_node = None if reversed: bound1, bound2 = bound2, bound1 abs_step = abs(step_value) if abs_step != 1: if (isinstance(bound1.constant_result, _py_int_types) and isinstance(bound2.constant_result, _py_int_types)): # calculate final bounds now if step_value < 0: begin_value = bound2.constant_result end_value = bound1.constant_result bound1_value = begin_value - abs_step * ((begin_value - end_value - 1) // abs_step) - 1 else: begin_value = bound1.constant_result end_value = bound2.constant_result bound1_value = end_value + abs_step * ((begin_value - end_value - 1) // abs_step) + 1 bound1 = ExprNodes.IntNode( bound1.pos, value=str(bound1_value), constant_result=bound1_value, type=PyrexTypes.spanning_type(bound1.type, bound2.type)) else: # evaluate the same expression as above at runtime bound2_ref_node = UtilNodes.LetRefNode(bound2) bound1 = self._build_range_step_calculation( bound1, bound2_ref_node, step, step_value) if step_value < 0: step_value = -step_value step.value = str(step_value) step.constant_result = step_value step = step.coerce_to_integer(self.current_env()) if not bound2.is_literal: # stop bound must be immutable => keep it in a temp var bound2_is_temp = True bound2 = bound2_ref_node or UtilNodes.LetRefNode(bound2) else: bound2_is_temp = False for_node = Nodes.ForFromStatNode( node.pos, target=node.target, bound1=bound1, relation1=relation1, relation2=relation2, bound2=bound2, step=step, body=node.body, else_clause=node.else_clause, from_range=True) for_node.set_up_loop(self.current_env()) if bound2_is_temp: for_node = UtilNodes.LetNode(bound2, for_node) return for_node def _build_range_step_calculation(self, bound1, bound2_ref_node, step, step_value): abs_step = abs(step_value) spanning_type = PyrexTypes.spanning_type(bound1.type, bound2_ref_node.type) if step.type.is_int and abs_step < 0x7FFF: # Avoid loss of integer precision warnings. spanning_step_type = PyrexTypes.spanning_type(spanning_type, PyrexTypes.c_int_type) else: spanning_step_type = PyrexTypes.spanning_type(spanning_type, step.type) if step_value < 0: begin_value = bound2_ref_node end_value = bound1 final_op = '-' else: begin_value = bound1 end_value = bound2_ref_node final_op = '+' step_calculation_node = ExprNodes.binop_node( bound1.pos, operand1=ExprNodes.binop_node( bound1.pos, operand1=bound2_ref_node, operator=final_op, # +/- operand2=ExprNodes.MulNode( bound1.pos, operand1=ExprNodes.IntNode( bound1.pos, value=str(abs_step), constant_result=abs_step, type=spanning_step_type), operator='*', operand2=ExprNodes.DivNode( bound1.pos, operand1=ExprNodes.SubNode( bound1.pos, operand1=ExprNodes.SubNode( bound1.pos, operand1=begin_value, operator='-', operand2=end_value, type=spanning_type), operator='-', operand2=ExprNodes.IntNode( bound1.pos, value='1', constant_result=1), type=spanning_step_type), operator='//', operand2=ExprNodes.IntNode( bound1.pos, value=str(abs_step), constant_result=abs_step, type=spanning_step_type), type=spanning_step_type), type=spanning_step_type), type=spanning_step_type), operator=final_op, # +/- operand2=ExprNodes.IntNode( bound1.pos, value='1', constant_result=1), type=spanning_type) return step_calculation_node def _transform_dict_iteration(self, node, dict_obj, method, keys, values): temps = [] temp = UtilNodes.TempHandle(PyrexTypes.py_object_type) temps.append(temp) dict_temp = temp.ref(dict_obj.pos) temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) temps.append(temp) pos_temp = temp.ref(node.pos) key_target = value_target = tuple_target = None if keys and values: if node.target.is_sequence_constructor: if len(node.target.args) == 2: key_target, value_target = node.target.args else: # unusual case that may or may not lead to an error return node else: tuple_target = node.target elif keys: key_target = node.target else: value_target = node.target if isinstance(node.body, Nodes.StatListNode): body = node.body else: body = Nodes.StatListNode(pos = node.body.pos, stats = [node.body]) # keep original length to guard against dict modification dict_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) temps.append(dict_len_temp) dict_len_temp_addr = ExprNodes.AmpersandNode( node.pos, operand=dict_len_temp.ref(dict_obj.pos), type=PyrexTypes.c_ptr_type(dict_len_temp.type)) temp = UtilNodes.TempHandle(PyrexTypes.c_int_type) temps.append(temp) is_dict_temp = temp.ref(node.pos) is_dict_temp_addr = ExprNodes.AmpersandNode( node.pos, operand=is_dict_temp, type=PyrexTypes.c_ptr_type(temp.type)) iter_next_node = Nodes.DictIterationNextNode( dict_temp, dict_len_temp.ref(dict_obj.pos), pos_temp, key_target, value_target, tuple_target, is_dict_temp) iter_next_node = iter_next_node.analyse_expressions(self.current_env()) body.stats[0:0] = [iter_next_node] if method: method_node = ExprNodes.StringNode( dict_obj.pos, is_identifier=True, value=method) dict_obj = dict_obj.as_none_safe_node( "'NoneType' object has no attribute '%{0}s'".format('.30' if len(method) <= 30 else ''), error = "PyExc_AttributeError", format_args = [method]) else: method_node = ExprNodes.NullNode(dict_obj.pos) dict_obj = dict_obj.as_none_safe_node("'NoneType' object is not iterable") def flag_node(value): value = value and 1 or 0 return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value) result_code = [ Nodes.SingleAssignmentNode( node.pos, lhs = pos_temp, rhs = ExprNodes.IntNode(node.pos, value='0', constant_result=0)), Nodes.SingleAssignmentNode( dict_obj.pos, lhs = dict_temp, rhs = ExprNodes.PythonCapiCallNode( dict_obj.pos, "__Pyx_dict_iterator", self.PyDict_Iterator_func_type, utility_code = UtilityCode.load_cached("dict_iter", "Optimize.c"), args = [dict_obj, flag_node(dict_obj.type is Builtin.dict_type), method_node, dict_len_temp_addr, is_dict_temp_addr, ], is_temp=True, )), Nodes.WhileStatNode( node.pos, condition = None, body = body, else_clause = node.else_clause ) ] return UtilNodes.TempsBlockNode( node.pos, temps=temps, body=Nodes.StatListNode( node.pos, stats = result_code )) PyDict_Iterator_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("is_dict", PyrexTypes.c_int_type, None), PyrexTypes.CFuncTypeArg("method_name", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("p_orig_length", PyrexTypes.c_py_ssize_t_ptr_type, None), PyrexTypes.CFuncTypeArg("p_is_dict", PyrexTypes.c_int_ptr_type, None), ]) PySet_Iterator_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("set", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("is_set", PyrexTypes.c_int_type, None), PyrexTypes.CFuncTypeArg("p_orig_length", PyrexTypes.c_py_ssize_t_ptr_type, None), PyrexTypes.CFuncTypeArg("p_is_set", PyrexTypes.c_int_ptr_type, None), ]) def _transform_set_iteration(self, node, set_obj): temps = [] temp = UtilNodes.TempHandle(PyrexTypes.py_object_type) temps.append(temp) set_temp = temp.ref(set_obj.pos) temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) temps.append(temp) pos_temp = temp.ref(node.pos) if isinstance(node.body, Nodes.StatListNode): body = node.body else: body = Nodes.StatListNode(pos = node.body.pos, stats = [node.body]) # keep original length to guard against set modification set_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type) temps.append(set_len_temp) set_len_temp_addr = ExprNodes.AmpersandNode( node.pos, operand=set_len_temp.ref(set_obj.pos), type=PyrexTypes.c_ptr_type(set_len_temp.type)) temp = UtilNodes.TempHandle(PyrexTypes.c_int_type) temps.append(temp) is_set_temp = temp.ref(node.pos) is_set_temp_addr = ExprNodes.AmpersandNode( node.pos, operand=is_set_temp, type=PyrexTypes.c_ptr_type(temp.type)) value_target = node.target iter_next_node = Nodes.SetIterationNextNode( set_temp, set_len_temp.ref(set_obj.pos), pos_temp, value_target, is_set_temp) iter_next_node = iter_next_node.analyse_expressions(self.current_env()) body.stats[0:0] = [iter_next_node] def flag_node(value): value = value and 1 or 0 return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value) result_code = [ Nodes.SingleAssignmentNode( node.pos, lhs=pos_temp, rhs=ExprNodes.IntNode(node.pos, value='0', constant_result=0)), Nodes.SingleAssignmentNode( set_obj.pos, lhs=set_temp, rhs=ExprNodes.PythonCapiCallNode( set_obj.pos, "__Pyx_set_iterator", self.PySet_Iterator_func_type, utility_code=UtilityCode.load_cached("set_iter", "Optimize.c"), args=[set_obj, flag_node(set_obj.type is Builtin.set_type), set_len_temp_addr, is_set_temp_addr, ], is_temp=True, )), Nodes.WhileStatNode( node.pos, condition=None, body=body, else_clause=node.else_clause, ) ] return UtilNodes.TempsBlockNode( node.pos, temps=temps, body=Nodes.StatListNode( node.pos, stats = result_code )) class SwitchTransform(Visitor.EnvTransform): """ This transformation tries to turn long if statements into C switch statements. The requirement is that every clause be an (or of) var == value, where the var is common among all clauses and both var and value are ints. """ NO_MATCH = (None, None, None) def extract_conditions(self, cond, allow_not_in): while True: if isinstance(cond, (ExprNodes.CoerceToTempNode, ExprNodes.CoerceToBooleanNode)): cond = cond.arg elif isinstance(cond, ExprNodes.BoolBinopResultNode): cond = cond.arg.arg elif isinstance(cond, UtilNodes.EvalWithTempExprNode): # this is what we get from the FlattenInListTransform cond = cond.subexpression elif isinstance(cond, ExprNodes.TypecastNode): cond = cond.operand else: break if isinstance(cond, ExprNodes.PrimaryCmpNode): if cond.cascade is not None: return self.NO_MATCH elif cond.is_c_string_contains() and \ isinstance(cond.operand2, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)): not_in = cond.operator == 'not_in' if not_in and not allow_not_in: return self.NO_MATCH if isinstance(cond.operand2, ExprNodes.UnicodeNode) and \ cond.operand2.contains_surrogates(): # dealing with surrogates leads to different # behaviour on wide and narrow Unicode # platforms => refuse to optimise this case return self.NO_MATCH return not_in, cond.operand1, self.extract_in_string_conditions(cond.operand2) elif not cond.is_python_comparison(): if cond.operator == '==': not_in = False elif allow_not_in and cond.operator == '!=': not_in = True else: return self.NO_MATCH # this looks somewhat silly, but it does the right # checks for NameNode and AttributeNode if is_common_value(cond.operand1, cond.operand1): if cond.operand2.is_literal: return not_in, cond.operand1, [cond.operand2] elif getattr(cond.operand2, 'entry', None) \ and cond.operand2.entry.is_const: return not_in, cond.operand1, [cond.operand2] if is_common_value(cond.operand2, cond.operand2): if cond.operand1.is_literal: return not_in, cond.operand2, [cond.operand1] elif getattr(cond.operand1, 'entry', None) \ and cond.operand1.entry.is_const: return not_in, cond.operand2, [cond.operand1] elif isinstance(cond, ExprNodes.BoolBinopNode): if cond.operator == 'or' or (allow_not_in and cond.operator == 'and'): allow_not_in = (cond.operator == 'and') not_in_1, t1, c1 = self.extract_conditions(cond.operand1, allow_not_in) not_in_2, t2, c2 = self.extract_conditions(cond.operand2, allow_not_in) if t1 is not None and not_in_1 == not_in_2 and is_common_value(t1, t2): if (not not_in_1) or allow_not_in: return not_in_1, t1, c1+c2 return self.NO_MATCH def extract_in_string_conditions(self, string_literal): if isinstance(string_literal, ExprNodes.UnicodeNode): charvals = list(map(ord, set(string_literal.value))) charvals.sort() return [ ExprNodes.IntNode(string_literal.pos, value=str(charval), constant_result=charval) for charval in charvals ] else: # this is a bit tricky as Py3's bytes type returns # integers on iteration, whereas Py2 returns 1-char byte # strings characters = string_literal.value characters = list(set([ characters[i:i+1] for i in range(len(characters)) ])) characters.sort() return [ ExprNodes.CharNode(string_literal.pos, value=charval, constant_result=charval) for charval in characters ] def extract_common_conditions(self, common_var, condition, allow_not_in): not_in, var, conditions = self.extract_conditions(condition, allow_not_in) if var is None: return self.NO_MATCH elif common_var is not None and not is_common_value(var, common_var): return self.NO_MATCH elif not (var.type.is_int or var.type.is_enum) or sum([not (cond.type.is_int or cond.type.is_enum) for cond in conditions]): return self.NO_MATCH return not_in, var, conditions def has_duplicate_values(self, condition_values): # duplicated values don't work in a switch statement seen = set() for value in condition_values: if value.has_constant_result(): if value.constant_result in seen: return True seen.add(value.constant_result) else: # this isn't completely safe as we don't know the # final C value, but this is about the best we can do try: if value.entry.cname in seen: return True except AttributeError: return True # play safe seen.add(value.entry.cname) return False def visit_IfStatNode(self, node): if not self.current_directives.get('optimize.use_switch'): self.visitchildren(node) return node common_var = None cases = [] for if_clause in node.if_clauses: _, common_var, conditions = self.extract_common_conditions( common_var, if_clause.condition, False) if common_var is None: self.visitchildren(node) return node cases.append(Nodes.SwitchCaseNode(pos=if_clause.pos, conditions=conditions, body=if_clause.body)) condition_values = [ cond for case in cases for cond in case.conditions] if len(condition_values) < 2: self.visitchildren(node) return node if self.has_duplicate_values(condition_values): self.visitchildren(node) return node # Recurse into body subtrees that we left untouched so far. self.visitchildren(node, 'else_clause') for case in cases: self.visitchildren(case, 'body') common_var = unwrap_node(common_var) switch_node = Nodes.SwitchStatNode(pos=node.pos, test=common_var, cases=cases, else_clause=node.else_clause) return switch_node def visit_CondExprNode(self, node): if not self.current_directives.get('optimize.use_switch'): self.visitchildren(node) return node not_in, common_var, conditions = self.extract_common_conditions( None, node.test, True) if common_var is None \ or len(conditions) < 2 \ or self.has_duplicate_values(conditions): self.visitchildren(node) return node return self.build_simple_switch_statement( node, common_var, conditions, not_in, node.true_val, node.false_val) def visit_BoolBinopNode(self, node): if not self.current_directives.get('optimize.use_switch'): self.visitchildren(node) return node not_in, common_var, conditions = self.extract_common_conditions( None, node, True) if common_var is None \ or len(conditions) < 2 \ or self.has_duplicate_values(conditions): self.visitchildren(node) node.wrap_operands(self.current_env()) # in case we changed the operands return node return self.build_simple_switch_statement( node, common_var, conditions, not_in, ExprNodes.BoolNode(node.pos, value=True, constant_result=True), ExprNodes.BoolNode(node.pos, value=False, constant_result=False)) def visit_PrimaryCmpNode(self, node): if not self.current_directives.get('optimize.use_switch'): self.visitchildren(node) return node not_in, common_var, conditions = self.extract_common_conditions( None, node, True) if common_var is None \ or len(conditions) < 2 \ or self.has_duplicate_values(conditions): self.visitchildren(node) return node return self.build_simple_switch_statement( node, common_var, conditions, not_in, ExprNodes.BoolNode(node.pos, value=True, constant_result=True), ExprNodes.BoolNode(node.pos, value=False, constant_result=False)) def build_simple_switch_statement(self, node, common_var, conditions, not_in, true_val, false_val): result_ref = UtilNodes.ResultRefNode(node) true_body = Nodes.SingleAssignmentNode( node.pos, lhs=result_ref, rhs=true_val.coerce_to(node.type, self.current_env()), first=True) false_body = Nodes.SingleAssignmentNode( node.pos, lhs=result_ref, rhs=false_val.coerce_to(node.type, self.current_env()), first=True) if not_in: true_body, false_body = false_body, true_body cases = [Nodes.SwitchCaseNode(pos = node.pos, conditions = conditions, body = true_body)] common_var = unwrap_node(common_var) switch_node = Nodes.SwitchStatNode(pos = node.pos, test = common_var, cases = cases, else_clause = false_body) replacement = UtilNodes.TempResultFromStatNode(result_ref, switch_node) return replacement def visit_EvalWithTempExprNode(self, node): if not self.current_directives.get('optimize.use_switch'): self.visitchildren(node) return node # drop unused expression temp from FlattenInListTransform orig_expr = node.subexpression temp_ref = node.lazy_temp self.visitchildren(node) if node.subexpression is not orig_expr: # node was restructured => check if temp is still used if not Visitor.tree_contains(node.subexpression, temp_ref): return node.subexpression return node visit_Node = Visitor.VisitorTransform.recurse_to_children class FlattenInListTransform(Visitor.VisitorTransform, SkipDeclarations): """ This transformation flattens "x in [val1, ..., valn]" into a sequential list of comparisons. """ def visit_PrimaryCmpNode(self, node): self.visitchildren(node) if node.cascade is not None: return node elif node.operator == 'in': conjunction = 'or' eq_or_neq = '==' elif node.operator == 'not_in': conjunction = 'and' eq_or_neq = '!=' else: return node if not isinstance(node.operand2, (ExprNodes.TupleNode, ExprNodes.ListNode, ExprNodes.SetNode)): return node args = node.operand2.args if len(args) == 0: # note: lhs may have side effects return node if any([arg.is_starred for arg in args]): # Starred arguments do not directly translate to comparisons or "in" tests. return node lhs = UtilNodes.ResultRefNode(node.operand1) conds = [] temps = [] for arg in args: try: # Trial optimisation to avoid redundant temp # assignments. However, since is_simple() is meant to # be called after type analysis, we ignore any errors # and just play safe in that case. is_simple_arg = arg.is_simple() except Exception: is_simple_arg = False if not is_simple_arg: # must evaluate all non-simple RHS before doing the comparisons arg = UtilNodes.LetRefNode(arg) temps.append(arg) cond = ExprNodes.PrimaryCmpNode( pos = node.pos, operand1 = lhs, operator = eq_or_neq, operand2 = arg, cascade = None) conds.append(ExprNodes.TypecastNode( pos = node.pos, operand = cond, type = PyrexTypes.c_bint_type)) def concat(left, right): return ExprNodes.BoolBinopNode( pos = node.pos, operator = conjunction, operand1 = left, operand2 = right) condition = reduce(concat, conds) new_node = UtilNodes.EvalWithTempExprNode(lhs, condition) for temp in temps[::-1]: new_node = UtilNodes.EvalWithTempExprNode(temp, new_node) return new_node visit_Node = Visitor.VisitorTransform.recurse_to_children class DropRefcountingTransform(Visitor.VisitorTransform): """Drop ref-counting in safe places. """ visit_Node = Visitor.VisitorTransform.recurse_to_children def visit_ParallelAssignmentNode(self, node): """ Parallel swap assignments like 'a,b = b,a' are safe. """ left_names, right_names = [], [] left_indices, right_indices = [], [] temps = [] for stat in node.stats: if isinstance(stat, Nodes.SingleAssignmentNode): if not self._extract_operand(stat.lhs, left_names, left_indices, temps): return node if not self._extract_operand(stat.rhs, right_names, right_indices, temps): return node elif isinstance(stat, Nodes.CascadedAssignmentNode): # FIXME return node else: return node if left_names or right_names: # lhs/rhs names must be a non-redundant permutation lnames = [ path for path, n in left_names ] rnames = [ path for path, n in right_names ] if set(lnames) != set(rnames): return node if len(set(lnames)) != len(right_names): return node if left_indices or right_indices: # base name and index of index nodes must be a # non-redundant permutation lindices = [] for lhs_node in left_indices: index_id = self._extract_index_id(lhs_node) if not index_id: return node lindices.append(index_id) rindices = [] for rhs_node in right_indices: index_id = self._extract_index_id(rhs_node) if not index_id: return node rindices.append(index_id) if set(lindices) != set(rindices): return node if len(set(lindices)) != len(right_indices): return node # really supporting IndexNode requires support in # __Pyx_GetItemInt(), so let's stop short for now return node temp_args = [t.arg for t in temps] for temp in temps: temp.use_managed_ref = False for _, name_node in left_names + right_names: if name_node not in temp_args: name_node.use_managed_ref = False for index_node in left_indices + right_indices: index_node.use_managed_ref = False return node def _extract_operand(self, node, names, indices, temps): node = unwrap_node(node) if not node.type.is_pyobject: return False if isinstance(node, ExprNodes.CoerceToTempNode): temps.append(node) node = node.arg name_path = [] obj_node = node while obj_node.is_attribute: if obj_node.is_py_attr: return False name_path.append(obj_node.member) obj_node = obj_node.obj if obj_node.is_name: name_path.append(obj_node.name) names.append( ('.'.join(name_path[::-1]), node) ) elif node.is_subscript: if node.base.type != Builtin.list_type: return False if not node.index.type.is_int: return False if not node.base.is_name: return False indices.append(node) else: return False return True def _extract_index_id(self, index_node): base = index_node.base index = index_node.index if isinstance(index, ExprNodes.NameNode): index_val = index.name elif isinstance(index, ExprNodes.ConstNode): # FIXME: return None else: return None return (base.name, index_val) class EarlyReplaceBuiltinCalls(Visitor.EnvTransform): """Optimize some common calls to builtin types *before* the type analysis phase and *after* the declarations analysis phase. This transform cannot make use of any argument types, but it can restructure the tree in a way that the type analysis phase can respond to. Introducing C function calls here may not be a good idea. Move them to the OptimizeBuiltinCalls transform instead, which runs after type analysis. """ # only intercept on call nodes visit_Node = Visitor.VisitorTransform.recurse_to_children def visit_SimpleCallNode(self, node): self.visitchildren(node) function = node.function if not self._function_is_builtin_name(function): return node return self._dispatch_to_handler(node, function, node.args) def visit_GeneralCallNode(self, node): self.visitchildren(node) function = node.function if not self._function_is_builtin_name(function): return node arg_tuple = node.positional_args if not isinstance(arg_tuple, ExprNodes.TupleNode): return node args = arg_tuple.args return self._dispatch_to_handler( node, function, args, node.keyword_args) def _function_is_builtin_name(self, function): if not function.is_name: return False env = self.current_env() entry = env.lookup(function.name) if entry is not env.builtin_scope().lookup_here(function.name): return False # if entry is None, it's at least an undeclared name, so likely builtin return True def _dispatch_to_handler(self, node, function, args, kwargs=None): if kwargs is None: handler_name = '_handle_simple_function_%s' % function.name else: handler_name = '_handle_general_function_%s' % function.name handle_call = getattr(self, handler_name, None) if handle_call is not None: if kwargs is None: return handle_call(node, args) else: return handle_call(node, args, kwargs) return node def _inject_capi_function(self, node, cname, func_type, utility_code=None): node.function = ExprNodes.PythonCapiFunctionNode( node.function.pos, node.function.name, cname, func_type, utility_code = utility_code) def _error_wrong_arg_count(self, function_name, node, args, expected=None): if not expected: # None or 0 arg_str = '' elif isinstance(expected, basestring) or expected > 1: arg_str = '...' elif expected == 1: arg_str = 'x' else: arg_str = '' if expected is not None: expected_str = 'expected %s, ' % expected else: expected_str = '' error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % ( function_name, arg_str, expected_str, len(args))) # specific handlers for simple call nodes def _handle_simple_function_float(self, node, pos_args): if not pos_args: return ExprNodes.FloatNode(node.pos, value='0.0') if len(pos_args) > 1: self._error_wrong_arg_count('float', node, pos_args, 1) arg_type = getattr(pos_args[0], 'type', None) if arg_type in (PyrexTypes.c_double_type, Builtin.float_type): return pos_args[0] return node def _handle_simple_function_slice(self, node, pos_args): arg_count = len(pos_args) start = step = None if arg_count == 1: stop, = pos_args elif arg_count == 2: start, stop = pos_args elif arg_count == 3: start, stop, step = pos_args else: self._error_wrong_arg_count('slice', node, pos_args) return node return ExprNodes.SliceNode( node.pos, start=start or ExprNodes.NoneNode(node.pos), stop=stop, step=step or ExprNodes.NoneNode(node.pos)) def _handle_simple_function_ord(self, node, pos_args): """Unpack ord('X'). """ if len(pos_args) != 1: return node arg = pos_args[0] if isinstance(arg, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)): if len(arg.value) == 1: return ExprNodes.IntNode( arg.pos, type=PyrexTypes.c_long_type, value=str(ord(arg.value)), constant_result=ord(arg.value) ) elif isinstance(arg, ExprNodes.StringNode): if arg.unicode_value and len(arg.unicode_value) == 1 \ and ord(arg.unicode_value) <= 255: # Py2/3 portability return ExprNodes.IntNode( arg.pos, type=PyrexTypes.c_int_type, value=str(ord(arg.unicode_value)), constant_result=ord(arg.unicode_value) ) return node # sequence processing def _handle_simple_function_all(self, node, pos_args): """Transform _result = all(p(x) for L in LL for x in L) into for L in LL: for x in L: if not p(x): return False else: return True """ return self._transform_any_all(node, pos_args, False) def _handle_simple_function_any(self, node, pos_args): """Transform _result = any(p(x) for L in LL for x in L) into for L in LL: for x in L: if p(x): return True else: return False """ return self._transform_any_all(node, pos_args, True) def _transform_any_all(self, node, pos_args, is_any): if len(pos_args) != 1: return node if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): return node gen_expr_node = pos_args[0] generator_body = gen_expr_node.def_node.gbody loop_node = generator_body.body yield_expression, yield_stat_node = _find_single_yield_expression(loop_node) if yield_expression is None: return node if is_any: condition = yield_expression else: condition = ExprNodes.NotNode(yield_expression.pos, operand=yield_expression) test_node = Nodes.IfStatNode( yield_expression.pos, else_clause=None, if_clauses=[ Nodes.IfClauseNode( yield_expression.pos, condition=condition, body=Nodes.ReturnStatNode( node.pos, value=ExprNodes.BoolNode(yield_expression.pos, value=is_any, constant_result=is_any)) )] ) loop_node.else_clause = Nodes.ReturnStatNode( node.pos, value=ExprNodes.BoolNode(yield_expression.pos, value=not is_any, constant_result=not is_any)) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, test_node) return ExprNodes.InlinedGeneratorExpressionNode( gen_expr_node.pos, gen=gen_expr_node, orig_func='any' if is_any else 'all') PySequence_List_func_type = PyrexTypes.CFuncType( Builtin.list_type, [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)]) def _handle_simple_function_sorted(self, node, pos_args): """Transform sorted(genexpr) and sorted([listcomp]) into [listcomp].sort(). CPython just reads the iterable into a list and calls .sort() on it. Expanding the iterable in a listcomp is still faster and the result can be sorted in place. """ if len(pos_args) != 1: return node arg = pos_args[0] if isinstance(arg, ExprNodes.ComprehensionNode) and arg.type is Builtin.list_type: list_node = pos_args[0] loop_node = list_node.loop elif isinstance(arg, ExprNodes.GeneratorExpressionNode): gen_expr_node = arg loop_node = gen_expr_node.loop yield_statements = _find_yield_statements(loop_node) if not yield_statements: return node list_node = ExprNodes.InlinedGeneratorExpressionNode( node.pos, gen_expr_node, orig_func='sorted', comprehension_type=Builtin.list_type) for yield_expression, yield_stat_node in yield_statements: append_node = ExprNodes.ComprehensionAppendNode( yield_expression.pos, expr=yield_expression, target=list_node.target) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node) elif arg.is_sequence_constructor: # sorted([a, b, c]) or sorted((a, b, c)). The result is always a list, # so starting off with a fresh one is more efficient. list_node = loop_node = arg.as_list() else: # Interestingly, PySequence_List works on a lot of non-sequence # things as well. list_node = loop_node = ExprNodes.PythonCapiCallNode( node.pos, "PySequence_List", self.PySequence_List_func_type, args=pos_args, is_temp=True) result_node = UtilNodes.ResultRefNode( pos=loop_node.pos, type=Builtin.list_type, may_hold_none=False) list_assign_node = Nodes.SingleAssignmentNode( node.pos, lhs=result_node, rhs=list_node, first=True) sort_method = ExprNodes.AttributeNode( node.pos, obj=result_node, attribute=EncodedString('sort'), # entry ? type ? needs_none_check=False) sort_node = Nodes.ExprStatNode( node.pos, expr=ExprNodes.SimpleCallNode( node.pos, function=sort_method, args=[])) sort_node.analyse_declarations(self.current_env()) return UtilNodes.TempResultFromStatNode( result_node, Nodes.StatListNode(node.pos, stats=[list_assign_node, sort_node])) def __handle_simple_function_sum(self, node, pos_args): """Transform sum(genexpr) into an equivalent inlined aggregation loop. """ if len(pos_args) not in (1,2): return node if not isinstance(pos_args[0], (ExprNodes.GeneratorExpressionNode, ExprNodes.ComprehensionNode)): return node gen_expr_node = pos_args[0] loop_node = gen_expr_node.loop if isinstance(gen_expr_node, ExprNodes.GeneratorExpressionNode): yield_expression, yield_stat_node = _find_single_yield_expression(loop_node) # FIXME: currently nonfunctional yield_expression = None if yield_expression is None: return node else: # ComprehensionNode yield_stat_node = gen_expr_node.append yield_expression = yield_stat_node.expr try: if not yield_expression.is_literal or not yield_expression.type.is_int: return node except AttributeError: return node # in case we don't have a type yet # special case: old Py2 backwards compatible "sum([int_const for ...])" # can safely be unpacked into a genexpr if len(pos_args) == 1: start = ExprNodes.IntNode(node.pos, value='0', constant_result=0) else: start = pos_args[1] result_ref = UtilNodes.ResultRefNode(pos=node.pos, type=PyrexTypes.py_object_type) add_node = Nodes.SingleAssignmentNode( yield_expression.pos, lhs = result_ref, rhs = ExprNodes.binop_node(node.pos, '+', result_ref, yield_expression) ) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, add_node) exec_code = Nodes.StatListNode( node.pos, stats = [ Nodes.SingleAssignmentNode( start.pos, lhs = UtilNodes.ResultRefNode(pos=node.pos, expression=result_ref), rhs = start, first = True), loop_node ]) return ExprNodes.InlinedGeneratorExpressionNode( gen_expr_node.pos, loop = exec_code, result_node = result_ref, expr_scope = gen_expr_node.expr_scope, orig_func = 'sum', has_local_scope = gen_expr_node.has_local_scope) def _handle_simple_function_min(self, node, pos_args): return self._optimise_min_max(node, pos_args, '<') def _handle_simple_function_max(self, node, pos_args): return self._optimise_min_max(node, pos_args, '>') def _optimise_min_max(self, node, args, operator): """Replace min(a,b,...) and max(a,b,...) by explicit comparison code. """ if len(args) <= 1: if len(args) == 1 and args[0].is_sequence_constructor: args = args[0].args if len(args) <= 1: # leave this to Python return node cascaded_nodes = list(map(UtilNodes.ResultRefNode, args[1:])) last_result = args[0] for arg_node in cascaded_nodes: result_ref = UtilNodes.ResultRefNode(last_result) last_result = ExprNodes.CondExprNode( arg_node.pos, true_val = arg_node, false_val = result_ref, test = ExprNodes.PrimaryCmpNode( arg_node.pos, operand1 = arg_node, operator = operator, operand2 = result_ref, ) ) last_result = UtilNodes.EvalWithTempExprNode(result_ref, last_result) for ref_node in cascaded_nodes[::-1]: last_result = UtilNodes.EvalWithTempExprNode(ref_node, last_result) return last_result # builtin type creation def _DISABLED_handle_simple_function_tuple(self, node, pos_args): if not pos_args: return ExprNodes.TupleNode(node.pos, args=[], constant_result=()) # This is a bit special - for iterables (including genexps), # Python actually overallocates and resizes a newly created # tuple incrementally while reading items, which we can't # easily do without explicit node support. Instead, we read # the items into a list and then copy them into a tuple of the # final size. This takes up to twice as much memory, but will # have to do until we have real support for genexps. result = self._transform_list_set_genexpr(node, pos_args, Builtin.list_type) if result is not node: return ExprNodes.AsTupleNode(node.pos, arg=result) return node def _handle_simple_function_frozenset(self, node, pos_args): """Replace frozenset([...]) by frozenset((...)) as tuples are more efficient. """ if len(pos_args) != 1: return node if pos_args[0].is_sequence_constructor and not pos_args[0].args: del pos_args[0] elif isinstance(pos_args[0], ExprNodes.ListNode): pos_args[0] = pos_args[0].as_tuple() return node def _handle_simple_function_list(self, node, pos_args): if not pos_args: return ExprNodes.ListNode(node.pos, args=[], constant_result=[]) return self._transform_list_set_genexpr(node, pos_args, Builtin.list_type) def _handle_simple_function_set(self, node, pos_args): if not pos_args: return ExprNodes.SetNode(node.pos, args=[], constant_result=set()) return self._transform_list_set_genexpr(node, pos_args, Builtin.set_type) def _transform_list_set_genexpr(self, node, pos_args, target_type): """Replace set(genexpr) and list(genexpr) by an inlined comprehension. """ if len(pos_args) > 1: return node if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): return node gen_expr_node = pos_args[0] loop_node = gen_expr_node.loop yield_statements = _find_yield_statements(loop_node) if not yield_statements: return node result_node = ExprNodes.InlinedGeneratorExpressionNode( node.pos, gen_expr_node, orig_func='set' if target_type is Builtin.set_type else 'list', comprehension_type=target_type) for yield_expression, yield_stat_node in yield_statements: append_node = ExprNodes.ComprehensionAppendNode( yield_expression.pos, expr=yield_expression, target=result_node.target) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node) return result_node def _handle_simple_function_dict(self, node, pos_args): """Replace dict( (a,b) for ... ) by an inlined { a:b for ... } """ if len(pos_args) == 0: return ExprNodes.DictNode(node.pos, key_value_pairs=[], constant_result={}) if len(pos_args) > 1: return node if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode): return node gen_expr_node = pos_args[0] loop_node = gen_expr_node.loop yield_statements = _find_yield_statements(loop_node) if not yield_statements: return node for yield_expression, _ in yield_statements: if not isinstance(yield_expression, ExprNodes.TupleNode): return node if len(yield_expression.args) != 2: return node result_node = ExprNodes.InlinedGeneratorExpressionNode( node.pos, gen_expr_node, orig_func='dict', comprehension_type=Builtin.dict_type) for yield_expression, yield_stat_node in yield_statements: append_node = ExprNodes.DictComprehensionAppendNode( yield_expression.pos, key_expr=yield_expression.args[0], value_expr=yield_expression.args[1], target=result_node.target) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node) return result_node # specific handlers for general call nodes def _handle_general_function_dict(self, node, pos_args, kwargs): """Replace dict(a=b,c=d,...) by the underlying keyword dict construction which is done anyway. """ if len(pos_args) > 0: return node if not isinstance(kwargs, ExprNodes.DictNode): return node return kwargs class InlineDefNodeCalls(Visitor.NodeRefCleanupMixin, Visitor.EnvTransform): visit_Node = Visitor.VisitorTransform.recurse_to_children def get_constant_value_node(self, name_node): if name_node.cf_state is None: return None if name_node.cf_state.cf_is_null: return None entry = self.current_env().lookup(name_node.name) if not entry or (not entry.cf_assignments or len(entry.cf_assignments) != 1): # not just a single assignment in all closures return None return entry.cf_assignments[0].rhs def visit_SimpleCallNode(self, node): self.visitchildren(node) if not self.current_directives.get('optimize.inline_defnode_calls'): return node function_name = node.function if not function_name.is_name: return node function = self.get_constant_value_node(function_name) if not isinstance(function, ExprNodes.PyCFunctionNode): return node inlined = ExprNodes.InlinedDefNodeCallNode( node.pos, function_name=function_name, function=function, args=node.args) if inlined.can_be_inlined(): return self.replace(node, inlined) return node class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, Visitor.MethodDispatcherTransform): """Optimize some common methods calls and instantiation patterns for builtin types *after* the type analysis phase. Running after type analysis, this transform can only perform function replacements that do not alter the function return type in a way that was not anticipated by the type analysis. """ ### cleanup to avoid redundant coercions to/from Python types def visit_PyTypeTestNode(self, node): """Flatten redundant type checks after tree changes. """ self.visitchildren(node) return node.reanalyse() def _visit_TypecastNode(self, node): # disabled - the user may have had a reason to put a type # cast, even if it looks redundant to Cython """ Drop redundant type casts. """ self.visitchildren(node) if node.type == node.operand.type: return node.operand return node def visit_ExprStatNode(self, node): """ Drop dead code and useless coercions. """ self.visitchildren(node) if isinstance(node.expr, ExprNodes.CoerceToPyTypeNode): node.expr = node.expr.arg expr = node.expr if expr is None or expr.is_none or expr.is_literal: # Expression was removed or is dead code => remove ExprStatNode as well. return None if expr.is_name and expr.entry and (expr.entry.is_local or expr.entry.is_arg): # Ignore dead references to local variables etc. return None return node def visit_CoerceToBooleanNode(self, node): """Drop redundant conversion nodes after tree changes. """ self.visitchildren(node) arg = node.arg if isinstance(arg, ExprNodes.PyTypeTestNode): arg = arg.arg if isinstance(arg, ExprNodes.CoerceToPyTypeNode): if arg.type in (PyrexTypes.py_object_type, Builtin.bool_type): return arg.arg.coerce_to_boolean(self.current_env()) return node PyNumber_Float_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None) ]) def visit_CoerceToPyTypeNode(self, node): """Drop redundant conversion nodes after tree changes.""" self.visitchildren(node) arg = node.arg if isinstance(arg, ExprNodes.CoerceFromPyTypeNode): arg = arg.arg if isinstance(arg, ExprNodes.PythonCapiCallNode): if arg.function.name == 'float' and len(arg.args) == 1: # undo redundant Py->C->Py coercion func_arg = arg.args[0] if func_arg.type is Builtin.float_type: return func_arg.as_none_safe_node("float() argument must be a string or a number, not 'NoneType'") elif func_arg.type.is_pyobject: return ExprNodes.PythonCapiCallNode( node.pos, '__Pyx_PyNumber_Float', self.PyNumber_Float_func_type, args=[func_arg], py_name='float', is_temp=node.is_temp, result_is_used=node.result_is_used, ).coerce_to(node.type, self.current_env()) return node def visit_CoerceFromPyTypeNode(self, node): """Drop redundant conversion nodes after tree changes. Also, optimise away calls to Python's builtin int() and float() if the result is going to be coerced back into a C type anyway. """ self.visitchildren(node) arg = node.arg if not arg.type.is_pyobject: # no Python conversion left at all, just do a C coercion instead if node.type != arg.type: arg = arg.coerce_to(node.type, self.current_env()) return arg if isinstance(arg, ExprNodes.PyTypeTestNode): arg = arg.arg if arg.is_literal: if (node.type.is_int and isinstance(arg, ExprNodes.IntNode) or node.type.is_float and isinstance(arg, ExprNodes.FloatNode) or node.type.is_int and isinstance(arg, ExprNodes.BoolNode)): return arg.coerce_to(node.type, self.current_env()) elif isinstance(arg, ExprNodes.CoerceToPyTypeNode): if arg.type is PyrexTypes.py_object_type: if node.type.assignable_from(arg.arg.type): # completely redundant C->Py->C coercion return arg.arg.coerce_to(node.type, self.current_env()) elif arg.type is Builtin.unicode_type: if arg.arg.type.is_unicode_char and node.type.is_unicode_char: return arg.arg.coerce_to(node.type, self.current_env()) elif isinstance(arg, ExprNodes.SimpleCallNode): if node.type.is_int or node.type.is_float: return self._optimise_numeric_cast_call(node, arg) elif arg.is_subscript: index_node = arg.index if isinstance(index_node, ExprNodes.CoerceToPyTypeNode): index_node = index_node.arg if index_node.type.is_int: return self._optimise_int_indexing(node, arg, index_node) return node PyBytes_GetItemInt_func_type = PyrexTypes.CFuncType( PyrexTypes.c_char_type, [ PyrexTypes.CFuncTypeArg("bytes", Builtin.bytes_type, None), PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("check_bounds", PyrexTypes.c_int_type, None), ], exception_value = "((char)-1)", exception_check = True) def _optimise_int_indexing(self, coerce_node, arg, index_node): env = self.current_env() bound_check_bool = env.directives['boundscheck'] and 1 or 0 if arg.base.type is Builtin.bytes_type: if coerce_node.type in (PyrexTypes.c_char_type, PyrexTypes.c_uchar_type): # bytes[index] -> char bound_check_node = ExprNodes.IntNode( coerce_node.pos, value=str(bound_check_bool), constant_result=bound_check_bool) node = ExprNodes.PythonCapiCallNode( coerce_node.pos, "__Pyx_PyBytes_GetItemInt", self.PyBytes_GetItemInt_func_type, args=[ arg.base.as_none_safe_node("'NoneType' object is not subscriptable"), index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env), bound_check_node, ], is_temp=True, utility_code=UtilityCode.load_cached( 'bytes_index', 'StringTools.c')) if coerce_node.type is not PyrexTypes.c_char_type: node = node.coerce_to(coerce_node.type, env) return node return coerce_node float_float_func_types = dict( (float_type, PyrexTypes.CFuncType( float_type, [ PyrexTypes.CFuncTypeArg("arg", float_type, None) ])) for float_type in (PyrexTypes.c_float_type, PyrexTypes.c_double_type, PyrexTypes.c_longdouble_type)) def _optimise_numeric_cast_call(self, node, arg): function = arg.function args = None if isinstance(arg, ExprNodes.PythonCapiCallNode): args = arg.args elif isinstance(function, ExprNodes.NameNode): if function.type.is_builtin_type and isinstance(arg.arg_tuple, ExprNodes.TupleNode): args = arg.arg_tuple.args if args is None or len(args) != 1: return node func_arg = args[0] if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): func_arg = func_arg.arg elif func_arg.type.is_pyobject: # play it safe: Python conversion might work on all sorts of things return node if function.name == 'int': if func_arg.type.is_int or node.type.is_int: if func_arg.type == node.type: return func_arg elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float: return ExprNodes.TypecastNode(node.pos, operand=func_arg, type=node.type) elif func_arg.type.is_float and node.type.is_numeric: if func_arg.type.math_h_modifier == 'l': # Work around missing Cygwin definition. truncl = '__Pyx_truncl' else: truncl = 'trunc' + func_arg.type.math_h_modifier return ExprNodes.PythonCapiCallNode( node.pos, truncl, func_type=self.float_float_func_types[func_arg.type], args=[func_arg], py_name='int', is_temp=node.is_temp, result_is_used=node.result_is_used, ).coerce_to(node.type, self.current_env()) elif function.name == 'float': if func_arg.type.is_float or node.type.is_float: if func_arg.type == node.type: return func_arg elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float: return ExprNodes.TypecastNode( node.pos, operand=func_arg, type=node.type) return node def _error_wrong_arg_count(self, function_name, node, args, expected=None): if not expected: # None or 0 arg_str = '' elif isinstance(expected, basestring) or expected > 1: arg_str = '...' elif expected == 1: arg_str = 'x' else: arg_str = '' if expected is not None: expected_str = 'expected %s, ' % expected else: expected_str = '' error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % ( function_name, arg_str, expected_str, len(args))) ### generic fallbacks def _handle_function(self, node, function_name, function, arg_list, kwargs): return node def _handle_method(self, node, type_name, attr_name, function, arg_list, is_unbound_method, kwargs): """ Try to inject C-API calls for unbound method calls to builtin types. While the method declarations in Builtin.py already handle this, we can additionally resolve bound and unbound methods here that were assigned to variables ahead of time. """ if kwargs: return node if not function or not function.is_attribute or not function.obj.is_name: # cannot track unbound method calls over more than one indirection as # the names might have been reassigned in the meantime return node type_entry = self.current_env().lookup(type_name) if not type_entry: return node method = ExprNodes.AttributeNode( node.function.pos, obj=ExprNodes.NameNode( function.pos, name=type_name, entry=type_entry, type=type_entry.type), attribute=attr_name, is_called=True).analyse_as_type_attribute(self.current_env()) if method is None: return self._optimise_generic_builtin_method_call( node, attr_name, function, arg_list, is_unbound_method) args = node.args if args is None and node.arg_tuple: args = node.arg_tuple.args call_node = ExprNodes.SimpleCallNode( node.pos, function=method, args=args) if not is_unbound_method: call_node.self = function.obj call_node.analyse_c_function_call(self.current_env()) call_node.analysed = True return call_node.coerce_to(node.type, self.current_env()) ### builtin types def _optimise_generic_builtin_method_call(self, node, attr_name, function, arg_list, is_unbound_method): """ Try to inject an unbound method call for a call to a method of a known builtin type. This enables caching the underlying C function of the method at runtime. """ arg_count = len(arg_list) if is_unbound_method or arg_count >= 3 or not (function.is_attribute and function.is_py_attr): return node if not function.obj.type.is_builtin_type: return node if function.obj.type.name in ('basestring', 'type'): # these allow different actual types => unsafe return node return ExprNodes.CachedBuiltinMethodCallNode( node, function.obj, attr_name, arg_list) PyObject_Unicode_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None) ]) def _handle_simple_function_unicode(self, node, function, pos_args): """Optimise single argument calls to unicode(). """ if len(pos_args) != 1: if len(pos_args) == 0: return ExprNodes.UnicodeNode(node.pos, value=EncodedString(), constant_result=u'') return node arg = pos_args[0] if arg.type is Builtin.unicode_type: if not arg.may_be_none(): return arg cname = "__Pyx_PyUnicode_Unicode" utility_code = UtilityCode.load_cached('PyUnicode_Unicode', 'StringTools.c') else: cname = "__Pyx_PyObject_Unicode" utility_code = UtilityCode.load_cached('PyObject_Unicode', 'StringTools.c') return ExprNodes.PythonCapiCallNode( node.pos, cname, self.PyObject_Unicode_func_type, args=pos_args, is_temp=node.is_temp, utility_code=utility_code, py_name="unicode") def visit_FormattedValueNode(self, node): """Simplify or avoid plain string formatting of a unicode value. This seems misplaced here, but plain unicode formatting is essentially a call to the unicode() builtin, which is optimised right above. """ self.visitchildren(node) if node.value.type is Builtin.unicode_type and not node.c_format_spec and not node.format_spec: if not node.conversion_char or node.conversion_char == 's': # value is definitely a unicode string and we don't format it any special return self._handle_simple_function_unicode(node, None, [node.value]) return node PyDict_Copy_func_type = PyrexTypes.CFuncType( Builtin.dict_type, [ PyrexTypes.CFuncTypeArg("dict", Builtin.dict_type, None) ]) def _handle_simple_function_dict(self, node, function, pos_args): """Replace dict(some_dict) by PyDict_Copy(some_dict). """ if len(pos_args) != 1: return node arg = pos_args[0] if arg.type is Builtin.dict_type: arg = arg.as_none_safe_node("'NoneType' is not iterable") return ExprNodes.PythonCapiCallNode( node.pos, "PyDict_Copy", self.PyDict_Copy_func_type, args = [arg], is_temp = node.is_temp ) return node PySequence_List_func_type = PyrexTypes.CFuncType( Builtin.list_type, [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)]) def _handle_simple_function_list(self, node, function, pos_args): """Turn list(ob) into PySequence_List(ob). """ if len(pos_args) != 1: return node arg = pos_args[0] return ExprNodes.PythonCapiCallNode( node.pos, "PySequence_List", self.PySequence_List_func_type, args=pos_args, is_temp=node.is_temp) PyList_AsTuple_func_type = PyrexTypes.CFuncType( Builtin.tuple_type, [ PyrexTypes.CFuncTypeArg("list", Builtin.list_type, None) ]) def _handle_simple_function_tuple(self, node, function, pos_args): """Replace tuple([...]) by PyList_AsTuple or PySequence_Tuple. """ if len(pos_args) != 1 or not node.is_temp: return node arg = pos_args[0] if arg.type is Builtin.tuple_type and not arg.may_be_none(): return arg if arg.type is Builtin.list_type: pos_args[0] = arg.as_none_safe_node( "'NoneType' object is not iterable") return ExprNodes.PythonCapiCallNode( node.pos, "PyList_AsTuple", self.PyList_AsTuple_func_type, args=pos_args, is_temp=node.is_temp) else: return ExprNodes.AsTupleNode(node.pos, arg=arg, type=Builtin.tuple_type) PySet_New_func_type = PyrexTypes.CFuncType( Builtin.set_type, [ PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None) ]) def _handle_simple_function_set(self, node, function, pos_args): if len(pos_args) != 1: return node if pos_args[0].is_sequence_constructor: # We can optimise set([x,y,z]) safely into a set literal, # but only if we create all items before adding them - # adding an item may raise an exception if it is not # hashable, but creating the later items may have # side-effects. args = [] temps = [] for arg in pos_args[0].args: if not arg.is_simple(): arg = UtilNodes.LetRefNode(arg) temps.append(arg) args.append(arg) result = ExprNodes.SetNode(node.pos, is_temp=1, args=args) self.replace(node, result) for temp in temps[::-1]: result = UtilNodes.EvalWithTempExprNode(temp, result) return result else: # PySet_New(it) is better than a generic Python call to set(it) return self.replace(node, ExprNodes.PythonCapiCallNode( node.pos, "PySet_New", self.PySet_New_func_type, args=pos_args, is_temp=node.is_temp, py_name="set")) PyFrozenSet_New_func_type = PyrexTypes.CFuncType( Builtin.frozenset_type, [ PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None) ]) def _handle_simple_function_frozenset(self, node, function, pos_args): if not pos_args: pos_args = [ExprNodes.NullNode(node.pos)] elif len(pos_args) > 1: return node elif pos_args[0].type is Builtin.frozenset_type and not pos_args[0].may_be_none(): return pos_args[0] # PyFrozenSet_New(it) is better than a generic Python call to frozenset(it) return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyFrozenSet_New", self.PyFrozenSet_New_func_type, args=pos_args, is_temp=node.is_temp, utility_code=UtilityCode.load_cached('pyfrozenset_new', 'Builtins.c'), py_name="frozenset") PyObject_AsDouble_func_type = PyrexTypes.CFuncType( PyrexTypes.c_double_type, [ PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), ], exception_value = "((double)-1)", exception_check = True) def _handle_simple_function_float(self, node, function, pos_args): """Transform float() into either a C type cast or a faster C function call. """ # Note: this requires the float() function to be typed as # returning a C 'double' if len(pos_args) == 0: return ExprNodes.FloatNode( node, value="0.0", constant_result=0.0 ).coerce_to(Builtin.float_type, self.current_env()) elif len(pos_args) != 1: self._error_wrong_arg_count('float', node, pos_args, '0 or 1') return node func_arg = pos_args[0] if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): func_arg = func_arg.arg if func_arg.type is PyrexTypes.c_double_type: return func_arg elif node.type.assignable_from(func_arg.type) or func_arg.type.is_numeric: return ExprNodes.TypecastNode( node.pos, operand=func_arg, type=node.type) return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyObject_AsDouble", self.PyObject_AsDouble_func_type, args = pos_args, is_temp = node.is_temp, utility_code = load_c_utility('pyobject_as_double'), py_name = "float") PyNumber_Int_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None) ]) PyInt_FromDouble_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_double_type, None) ]) def _handle_simple_function_int(self, node, function, pos_args): """Transform int() into a faster C function call. """ if len(pos_args) == 0: return ExprNodes.IntNode(node.pos, value="0", constant_result=0, type=PyrexTypes.py_object_type) elif len(pos_args) != 1: return node # int(x, base) func_arg = pos_args[0] if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode): if func_arg.arg.type.is_float: return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyInt_FromDouble", self.PyInt_FromDouble_func_type, args=[func_arg.arg], is_temp=True, py_name='int', utility_code=UtilityCode.load_cached("PyIntFromDouble", "TypeConversion.c")) else: return node # handled in visit_CoerceFromPyTypeNode() if func_arg.type.is_pyobject and node.type.is_pyobject: return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyNumber_Int", self.PyNumber_Int_func_type, args=pos_args, is_temp=True, py_name='int') return node def _handle_simple_function_bool(self, node, function, pos_args): """Transform bool(x) into a type coercion to a boolean. """ if len(pos_args) == 0: return ExprNodes.BoolNode( node.pos, value=False, constant_result=False ).coerce_to(Builtin.bool_type, self.current_env()) elif len(pos_args) != 1: self._error_wrong_arg_count('bool', node, pos_args, '0 or 1') return node else: # => !!(x) to make sure it's exactly 0 or 1 operand = pos_args[0].coerce_to_boolean(self.current_env()) operand = ExprNodes.NotNode(node.pos, operand = operand) operand = ExprNodes.NotNode(node.pos, operand = operand) # coerce back to Python object as that's the result we are expecting return operand.coerce_to_pyobject(self.current_env()) ### builtin functions Pyx_strlen_func_type = PyrexTypes.CFuncType( PyrexTypes.c_size_t_type, [ PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None) ]) Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType( PyrexTypes.c_size_t_type, [ PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None) ]) PyObject_Size_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None) ], exception_value="-1") _map_to_capi_len_function = { Builtin.unicode_type: "__Pyx_PyUnicode_GET_LENGTH", Builtin.bytes_type: "PyBytes_GET_SIZE", Builtin.bytearray_type: 'PyByteArray_GET_SIZE', Builtin.list_type: "PyList_GET_SIZE", Builtin.tuple_type: "PyTuple_GET_SIZE", Builtin.set_type: "PySet_GET_SIZE", Builtin.frozenset_type: "PySet_GET_SIZE", Builtin.dict_type: "PyDict_Size", }.get _ext_types_with_pysize = set(["cpython.array.array"]) def _handle_simple_function_len(self, node, function, pos_args): """Replace len(char*) by the equivalent call to strlen(), len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and len(known_builtin_type) by an equivalent C-API call. """ if len(pos_args) != 1: self._error_wrong_arg_count('len', node, pos_args, 1) return node arg = pos_args[0] if isinstance(arg, ExprNodes.CoerceToPyTypeNode): arg = arg.arg if arg.type.is_string: new_node = ExprNodes.PythonCapiCallNode( node.pos, "strlen", self.Pyx_strlen_func_type, args = [arg], is_temp = node.is_temp, utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c")) elif arg.type.is_pyunicode_ptr: new_node = ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type, args = [arg], is_temp = node.is_temp) elif arg.type.is_memoryviewslice: func_type = PyrexTypes.CFuncType( PyrexTypes.c_size_t_type, [ PyrexTypes.CFuncTypeArg("memoryviewslice", arg.type, None) ], nogil=True) new_node = ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_MemoryView_Len", func_type, args=[arg], is_temp=node.is_temp) elif arg.type.is_pyobject: cfunc_name = self._map_to_capi_len_function(arg.type) if cfunc_name is None: arg_type = arg.type if ((arg_type.is_extension_type or arg_type.is_builtin_type) and arg_type.entry.qualified_name in self._ext_types_with_pysize): cfunc_name = 'Py_SIZE' else: return node arg = arg.as_none_safe_node( "object of type 'NoneType' has no len()") new_node = ExprNodes.PythonCapiCallNode( node.pos, cfunc_name, self.PyObject_Size_func_type, args=[arg], is_temp=node.is_temp) elif arg.type.is_unicode_char: return ExprNodes.IntNode(node.pos, value='1', constant_result=1, type=node.type) else: return node if node.type not in (PyrexTypes.c_size_t_type, PyrexTypes.c_py_ssize_t_type): new_node = new_node.coerce_to(node.type, self.current_env()) return new_node Pyx_Type_func_type = PyrexTypes.CFuncType( Builtin.type_type, [ PyrexTypes.CFuncTypeArg("object", PyrexTypes.py_object_type, None) ]) def _handle_simple_function_type(self, node, function, pos_args): """Replace type(o) by a macro call to Py_TYPE(o). """ if len(pos_args) != 1: return node node = ExprNodes.PythonCapiCallNode( node.pos, "Py_TYPE", self.Pyx_Type_func_type, args = pos_args, is_temp = False) return ExprNodes.CastNode(node, PyrexTypes.py_object_type) Py_type_check_func_type = PyrexTypes.CFuncType( PyrexTypes.c_bint_type, [ PyrexTypes.CFuncTypeArg("arg", PyrexTypes.py_object_type, None) ]) def _handle_simple_function_isinstance(self, node, function, pos_args): """Replace isinstance() checks against builtin types by the corresponding C-API call. """ if len(pos_args) != 2: return node arg, types = pos_args temps = [] if isinstance(types, ExprNodes.TupleNode): types = types.args if len(types) == 1 and not types[0].type is Builtin.type_type: return node # nothing to improve here if arg.is_attribute or not arg.is_simple(): arg = UtilNodes.ResultRefNode(arg) temps.append(arg) elif types.type is Builtin.type_type: types = [types] else: return node tests = [] test_nodes = [] env = self.current_env() for test_type_node in types: builtin_type = None if test_type_node.is_name: if test_type_node.entry: entry = env.lookup(test_type_node.entry.name) if entry and entry.type and entry.type.is_builtin_type: builtin_type = entry.type if builtin_type is Builtin.type_type: # all types have type "type", but there's only one 'type' if entry.name != 'type' or not ( entry.scope and entry.scope.is_builtin_scope): builtin_type = None if builtin_type is not None: type_check_function = entry.type.type_check_function(exact=False) if type_check_function in tests: continue tests.append(type_check_function) type_check_args = [arg] elif test_type_node.type is Builtin.type_type: type_check_function = '__Pyx_TypeCheck' type_check_args = [arg, test_type_node] else: if not test_type_node.is_literal: test_type_node = UtilNodes.ResultRefNode(test_type_node) temps.append(test_type_node) type_check_function = 'PyObject_IsInstance' type_check_args = [arg, test_type_node] test_nodes.append( ExprNodes.PythonCapiCallNode( test_type_node.pos, type_check_function, self.Py_type_check_func_type, args=type_check_args, is_temp=True, )) def join_with_or(a, b, make_binop_node=ExprNodes.binop_node): or_node = make_binop_node(node.pos, 'or', a, b) or_node.type = PyrexTypes.c_bint_type or_node.wrap_operands(env) return or_node test_node = reduce(join_with_or, test_nodes).coerce_to(node.type, env) for temp in temps[::-1]: test_node = UtilNodes.EvalWithTempExprNode(temp, test_node) return test_node def _handle_simple_function_ord(self, node, function, pos_args): """Unpack ord(Py_UNICODE) and ord('X'). """ if len(pos_args) != 1: return node arg = pos_args[0] if isinstance(arg, ExprNodes.CoerceToPyTypeNode): if arg.arg.type.is_unicode_char: return ExprNodes.TypecastNode( arg.pos, operand=arg.arg, type=PyrexTypes.c_long_type ).coerce_to(node.type, self.current_env()) elif isinstance(arg, ExprNodes.UnicodeNode): if len(arg.value) == 1: return ExprNodes.IntNode( arg.pos, type=PyrexTypes.c_int_type, value=str(ord(arg.value)), constant_result=ord(arg.value) ).coerce_to(node.type, self.current_env()) elif isinstance(arg, ExprNodes.StringNode): if arg.unicode_value and len(arg.unicode_value) == 1 \ and ord(arg.unicode_value) <= 255: # Py2/3 portability return ExprNodes.IntNode( arg.pos, type=PyrexTypes.c_int_type, value=str(ord(arg.unicode_value)), constant_result=ord(arg.unicode_value) ).coerce_to(node.type, self.current_env()) return node ### special methods Pyx_tp_new_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("type", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("args", Builtin.tuple_type, None), ]) Pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("type", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("args", Builtin.tuple_type, None), PyrexTypes.CFuncTypeArg("kwargs", Builtin.dict_type, None), ]) def _handle_any_slot__new__(self, node, function, args, is_unbound_method, kwargs=None): """Replace 'exttype.__new__(exttype, ...)' by a call to exttype->tp_new() """ obj = function.obj if not is_unbound_method or len(args) < 1: return node type_arg = args[0] if not obj.is_name or not type_arg.is_name: # play safe return node if obj.type != Builtin.type_type or type_arg.type != Builtin.type_type: # not a known type, play safe return node if not type_arg.type_entry or not obj.type_entry: if obj.name != type_arg.name: return node # otherwise, we know it's a type and we know it's the same # type for both - that should do elif type_arg.type_entry != obj.type_entry: # different types - may or may not lead to an error at runtime return node args_tuple = ExprNodes.TupleNode(node.pos, args=args[1:]) args_tuple = args_tuple.analyse_types( self.current_env(), skip_children=True) if type_arg.type_entry: ext_type = type_arg.type_entry.type if (ext_type.is_extension_type and ext_type.typeobj_cname and ext_type.scope.global_scope() == self.current_env().global_scope()): # known type in current module tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__') slot_func_cname = TypeSlots.get_slot_function(ext_type.scope, tp_slot) if slot_func_cname: cython_scope = self.context.cython_scope PyTypeObjectPtr = PyrexTypes.CPtrType( cython_scope.lookup('PyTypeObject').type) pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType( ext_type, [ PyrexTypes.CFuncTypeArg("type", PyTypeObjectPtr, None), PyrexTypes.CFuncTypeArg("args", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("kwargs", PyrexTypes.py_object_type, None), ]) type_arg = ExprNodes.CastNode(type_arg, PyTypeObjectPtr) if not kwargs: kwargs = ExprNodes.NullNode(node.pos, type=PyrexTypes.py_object_type) # hack? return ExprNodes.PythonCapiCallNode( node.pos, slot_func_cname, pyx_tp_new_kwargs_func_type, args=[type_arg, args_tuple, kwargs], may_return_none=False, is_temp=True) else: # arbitrary variable, needs a None check for safety type_arg = type_arg.as_none_safe_node( "object.__new__(X): X is not a type object (NoneType)") utility_code = UtilityCode.load_cached('tp_new', 'ObjectHandling.c') if kwargs: return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_tp_new_kwargs", self.Pyx_tp_new_kwargs_func_type, args=[type_arg, args_tuple, kwargs], utility_code=utility_code, is_temp=node.is_temp ) else: return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_tp_new", self.Pyx_tp_new_func_type, args=[type_arg, args_tuple], utility_code=utility_code, is_temp=node.is_temp ) ### methods of builtin types PyObject_Append_func_type = PyrexTypes.CFuncType( PyrexTypes.c_returncode_type, [ PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("item", PyrexTypes.py_object_type, None), ], exception_value="-1") def _handle_simple_method_object_append(self, node, function, args, is_unbound_method): """Optimistic optimisation as X.append() is almost always referring to a list. """ if len(args) != 2 or node.result_is_used: return node return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyObject_Append", self.PyObject_Append_func_type, args=args, may_return_none=False, is_temp=node.is_temp, result_is_used=False, utility_code=load_c_utility('append') ) def _handle_simple_method_list_extend(self, node, function, args, is_unbound_method): """Replace list.extend([...]) for short sequence literals values by sequential appends to avoid creating an intermediate sequence argument. """ if len(args) != 2: return node obj, value = args if not value.is_sequence_constructor: return node items = list(value.args) if value.mult_factor is not None or len(items) > 8: # Appending wins for short sequences but slows down when multiple resize operations are needed. # This seems to be a good enough limit that avoids repeated resizing. if False and isinstance(value, ExprNodes.ListNode): # One would expect that tuples are more efficient here, but benchmarking with # Py3.5 and Py3.7 suggests that they are not. Probably worth revisiting at some point. # Might be related to the usage of PySequence_FAST() in CPython's list.extend(), # which is probably tuned more towards lists than tuples (and rightly so). tuple_node = args[1].as_tuple().analyse_types(self.current_env(), skip_children=True) Visitor.recursively_replace_node(node, args[1], tuple_node) return node wrapped_obj = self._wrap_self_arg(obj, function, is_unbound_method, 'extend') if not items: # Empty sequences are not likely to occur, but why waste a call to list.extend() for them? wrapped_obj.result_is_used = node.result_is_used return wrapped_obj cloned_obj = obj = wrapped_obj if len(items) > 1 and not obj.is_simple(): cloned_obj = UtilNodes.LetRefNode(obj) # Use ListComp_Append() for all but the last item and finish with PyList_Append() # to shrink the list storage size at the very end if necessary. temps = [] arg = items[-1] if not arg.is_simple(): arg = UtilNodes.LetRefNode(arg) temps.append(arg) new_node = ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_PyList_Append", self.PyObject_Append_func_type, args=[cloned_obj, arg], is_temp=True, utility_code=load_c_utility("ListAppend")) for arg in items[-2::-1]: if not arg.is_simple(): arg = UtilNodes.LetRefNode(arg) temps.append(arg) new_node = ExprNodes.binop_node( node.pos, '|', ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_ListComp_Append", self.PyObject_Append_func_type, args=[cloned_obj, arg], py_name="extend", is_temp=True, utility_code=load_c_utility("ListCompAppend")), new_node, type=PyrexTypes.c_returncode_type, ) new_node.result_is_used = node.result_is_used if cloned_obj is not obj: temps.append(cloned_obj) for temp in temps: new_node = UtilNodes.EvalWithTempExprNode(temp, new_node) new_node.result_is_used = node.result_is_used return new_node PyByteArray_Append_func_type = PyrexTypes.CFuncType( PyrexTypes.c_returncode_type, [ PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_int_type, None), ], exception_value="-1") PyByteArray_AppendObject_func_type = PyrexTypes.CFuncType( PyrexTypes.c_returncode_type, [ PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None), ], exception_value="-1") def _handle_simple_method_bytearray_append(self, node, function, args, is_unbound_method): if len(args) != 2: return node func_name = "__Pyx_PyByteArray_Append" func_type = self.PyByteArray_Append_func_type value = unwrap_coerced_node(args[1]) if value.type.is_int or isinstance(value, ExprNodes.IntNode): value = value.coerce_to(PyrexTypes.c_int_type, self.current_env()) utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c") elif value.is_string_literal: if not value.can_coerce_to_char_literal(): return node value = value.coerce_to(PyrexTypes.c_char_type, self.current_env()) utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c") elif value.type.is_pyobject: func_name = "__Pyx_PyByteArray_AppendObject" func_type = self.PyByteArray_AppendObject_func_type utility_code = UtilityCode.load_cached("ByteArrayAppendObject", "StringTools.c") else: return node new_node = ExprNodes.PythonCapiCallNode( node.pos, func_name, func_type, args=[args[0], value], may_return_none=False, is_temp=node.is_temp, utility_code=utility_code, ) if node.result_is_used: new_node = new_node.coerce_to(node.type, self.current_env()) return new_node PyObject_Pop_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), ]) PyObject_PopIndex_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("py_index", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("c_index", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("is_signed", PyrexTypes.c_int_type, None), ], has_varargs=True) # to fake the additional macro args that lack a proper C type def _handle_simple_method_list_pop(self, node, function, args, is_unbound_method): return self._handle_simple_method_object_pop( node, function, args, is_unbound_method, is_list=True) def _handle_simple_method_object_pop(self, node, function, args, is_unbound_method, is_list=False): """Optimistic optimisation as X.pop([n]) is almost always referring to a list. """ if not args: return node obj = args[0] if is_list: type_name = 'List' obj = obj.as_none_safe_node( "'NoneType' object has no attribute '%.30s'", error="PyExc_AttributeError", format_args=['pop']) else: type_name = 'Object' if len(args) == 1: return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_Py%s_Pop" % type_name, self.PyObject_Pop_func_type, args=[obj], may_return_none=True, is_temp=node.is_temp, utility_code=load_c_utility('pop'), ) elif len(args) == 2: index = unwrap_coerced_node(args[1]) py_index = ExprNodes.NoneNode(index.pos) orig_index_type = index.type if not index.type.is_int: if isinstance(index, ExprNodes.IntNode): py_index = index.coerce_to_pyobject(self.current_env()) index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) elif is_list: if index.type.is_pyobject: py_index = index.coerce_to_simple(self.current_env()) index = ExprNodes.CloneNode(py_index) index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) else: return node elif not PyrexTypes.numeric_type_fits(index.type, PyrexTypes.c_py_ssize_t_type): return node elif isinstance(index, ExprNodes.IntNode): py_index = index.coerce_to_pyobject(self.current_env()) # real type might still be larger at runtime if not orig_index_type.is_int: orig_index_type = index.type if not orig_index_type.create_to_py_utility_code(self.current_env()): return node convert_func = orig_index_type.to_py_function conversion_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [PyrexTypes.CFuncTypeArg("intval", orig_index_type, None)]) return ExprNodes.PythonCapiCallNode( node.pos, "__Pyx_Py%s_PopIndex" % type_name, self.PyObject_PopIndex_func_type, args=[obj, py_index, index, ExprNodes.IntNode(index.pos, value=str(orig_index_type.signed and 1 or 0), constant_result=orig_index_type.signed and 1 or 0, type=PyrexTypes.c_int_type), ExprNodes.RawCNameExprNode(index.pos, PyrexTypes.c_void_type, orig_index_type.empty_declaration_code()), ExprNodes.RawCNameExprNode(index.pos, conversion_type, convert_func)], may_return_none=True, is_temp=node.is_temp, utility_code=load_c_utility("pop_index"), ) return node single_param_func_type = PyrexTypes.CFuncType( PyrexTypes.c_returncode_type, [ PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), ], exception_value = "-1") def _handle_simple_method_list_sort(self, node, function, args, is_unbound_method): """Call PyList_Sort() instead of the 0-argument l.sort(). """ if len(args) != 1: return node return self._substitute_method_call( node, function, "PyList_Sort", self.single_param_func_type, 'sort', is_unbound_method, args).coerce_to(node.type, self.current_env) Pyx_PyDict_GetItem_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), ]) def _handle_simple_method_dict_get(self, node, function, args, is_unbound_method): """Replace dict.get() by a call to PyDict_GetItem(). """ if len(args) == 2: args.append(ExprNodes.NoneNode(node.pos)) elif len(args) != 3: self._error_wrong_arg_count('dict.get', node, args, "2 or 3") return node return self._substitute_method_call( node, function, "__Pyx_PyDict_GetItemDefault", self.Pyx_PyDict_GetItem_func_type, 'get', is_unbound_method, args, may_return_none = True, utility_code = load_c_utility("dict_getitem_default")) Pyx_PyDict_SetDefault_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("is_safe_type", PyrexTypes.c_int_type, None), ]) def _handle_simple_method_dict_setdefault(self, node, function, args, is_unbound_method): """Replace dict.setdefault() by calls to PyDict_GetItem() and PyDict_SetItem(). """ if len(args) == 2: args.append(ExprNodes.NoneNode(node.pos)) elif len(args) != 3: self._error_wrong_arg_count('dict.setdefault', node, args, "2 or 3") return node key_type = args[1].type if key_type.is_builtin_type: is_safe_type = int(key_type.name in 'str bytes unicode float int long bool') elif key_type is PyrexTypes.py_object_type: is_safe_type = -1 # don't know else: is_safe_type = 0 # definitely not args.append(ExprNodes.IntNode( node.pos, value=str(is_safe_type), constant_result=is_safe_type)) return self._substitute_method_call( node, function, "__Pyx_PyDict_SetDefault", self.Pyx_PyDict_SetDefault_func_type, 'setdefault', is_unbound_method, args, may_return_none=True, utility_code=load_c_utility('dict_setdefault')) PyDict_Pop_func_type = PyrexTypes.CFuncType( PyrexTypes.py_object_type, [ PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None), ]) def _handle_simple_method_dict_pop(self, node, function, args, is_unbound_method): """Replace dict.pop() by a call to _PyDict_Pop(). """ if len(args) == 2: args.append(ExprNodes.NullNode(node.pos)) elif len(args) != 3: self._error_wrong_arg_count('dict.pop', node, args, "2 or 3") return node return self._substitute_method_call( node, function, "__Pyx_PyDict_Pop", self.PyDict_Pop_func_type, 'pop', is_unbound_method, args, may_return_none=True, utility_code=load_c_utility('py_dict_pop')) Pyx_BinopInt_func_types = dict( ((ctype, ret_type), PyrexTypes.CFuncType( ret_type, [ PyrexTypes.CFuncTypeArg("op1", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("op2", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("cval", ctype, None), PyrexTypes.CFuncTypeArg("inplace", PyrexTypes.c_bint_type, None), PyrexTypes.CFuncTypeArg("zerodiv_check", PyrexTypes.c_bint_type, None), ], exception_value=None if ret_type.is_pyobject else ret_type.exception_value)) for ctype in (PyrexTypes.c_long_type, PyrexTypes.c_double_type) for ret_type in (PyrexTypes.py_object_type, PyrexTypes.c_bint_type) ) def _handle_simple_method_object___add__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Add', node, function, args, is_unbound_method) def _handle_simple_method_object___sub__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method) def _handle_simple_method_object___eq__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Eq', node, function, args, is_unbound_method) def _handle_simple_method_object___ne__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Ne', node, function, args, is_unbound_method) def _handle_simple_method_object___and__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('And', node, function, args, is_unbound_method) def _handle_simple_method_object___or__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Or', node, function, args, is_unbound_method) def _handle_simple_method_object___xor__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Xor', node, function, args, is_unbound_method) def _handle_simple_method_object___rshift__(self, node, function, args, is_unbound_method): if len(args) != 2 or not isinstance(args[1], ExprNodes.IntNode): return node if not args[1].has_constant_result() or not (1 <= args[1].constant_result <= 63): return node return self._optimise_num_binop('Rshift', node, function, args, is_unbound_method) def _handle_simple_method_object___lshift__(self, node, function, args, is_unbound_method): if len(args) != 2 or not isinstance(args[1], ExprNodes.IntNode): return node if not args[1].has_constant_result() or not (1 <= args[1].constant_result <= 63): return node return self._optimise_num_binop('Lshift', node, function, args, is_unbound_method) def _handle_simple_method_object___mod__(self, node, function, args, is_unbound_method): return self._optimise_num_div('Remainder', node, function, args, is_unbound_method) def _handle_simple_method_object___floordiv__(self, node, function, args, is_unbound_method): return self._optimise_num_div('FloorDivide', node, function, args, is_unbound_method) def _handle_simple_method_object___truediv__(self, node, function, args, is_unbound_method): return self._optimise_num_div('TrueDivide', node, function, args, is_unbound_method) def _handle_simple_method_object___div__(self, node, function, args, is_unbound_method): return self._optimise_num_div('Divide', node, function, args, is_unbound_method) def _optimise_num_div(self, operator, node, function, args, is_unbound_method): if len(args) != 2 or not args[1].has_constant_result() or args[1].constant_result == 0: return node if isinstance(args[1], ExprNodes.IntNode): if not (-2**30 <= args[1].constant_result <= 2**30): return node elif isinstance(args[1], ExprNodes.FloatNode): if not (-2**53 <= args[1].constant_result <= 2**53): return node else: return node return self._optimise_num_binop(operator, node, function, args, is_unbound_method) def _handle_simple_method_float___add__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Add', node, function, args, is_unbound_method) def _handle_simple_method_float___sub__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method) def _handle_simple_method_float___truediv__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('TrueDivide', node, function, args, is_unbound_method) def _handle_simple_method_float___div__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Divide', node, function, args, is_unbound_method) def _handle_simple_method_float___mod__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Remainder', node, function, args, is_unbound_method) def _handle_simple_method_float___eq__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Eq', node, function, args, is_unbound_method) def _handle_simple_method_float___ne__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Ne', node, function, args, is_unbound_method) def _optimise_num_binop(self, operator, node, function, args, is_unbound_method): """ Optimise math operators for (likely) float or small integer operations. """ if len(args) != 2: return node if node.type.is_pyobject: ret_type = PyrexTypes.py_object_type elif node.type is PyrexTypes.c_bint_type and operator in ('Eq', 'Ne'): ret_type = PyrexTypes.c_bint_type else: return node # When adding IntNode/FloatNode to something else, assume other operand is also numeric. # Prefer constants on RHS as they allows better size control for some operators. num_nodes = (ExprNodes.IntNode, ExprNodes.FloatNode) if isinstance(args[1], num_nodes): if args[0].type is not PyrexTypes.py_object_type: return node numval = args[1] arg_order = 'ObjC' elif isinstance(args[0], num_nodes): if args[1].type is not PyrexTypes.py_object_type: return node numval = args[0] arg_order = 'CObj' else: return node if not numval.has_constant_result(): return node is_float = isinstance(numval, ExprNodes.FloatNode) num_type = PyrexTypes.c_double_type if is_float else PyrexTypes.c_long_type if is_float: if operator not in ('Add', 'Subtract', 'Remainder', 'TrueDivide', 'Divide', 'Eq', 'Ne'): return node elif operator == 'Divide': # mixed old-/new-style division is not currently optimised for integers return node elif abs(numval.constant_result) > 2**30: # Cut off at an integer border that is still safe for all operations. return node if operator in ('TrueDivide', 'FloorDivide', 'Divide', 'Remainder'): if args[1].constant_result == 0: # Don't optimise division by 0. :) return node args = list(args) args.append((ExprNodes.FloatNode if is_float else ExprNodes.IntNode)( numval.pos, value=numval.value, constant_result=numval.constant_result, type=num_type)) inplace = node.inplace if isinstance(node, ExprNodes.NumBinopNode) else False args.append(ExprNodes.BoolNode(node.pos, value=inplace, constant_result=inplace)) if is_float or operator not in ('Eq', 'Ne'): # "PyFloatBinop" and "PyIntBinop" take an additional "check for zero division" argument. zerodivision_check = arg_order == 'CObj' and ( not node.cdivision if isinstance(node, ExprNodes.DivNode) else False) args.append(ExprNodes.BoolNode(node.pos, value=zerodivision_check, constant_result=zerodivision_check)) utility_code = TempitaUtilityCode.load_cached( "PyFloatBinop" if is_float else "PyIntCompare" if operator in ('Eq', 'Ne') else "PyIntBinop", "Optimize.c", context=dict(op=operator, order=arg_order, ret_type=ret_type)) call_node = self._substitute_method_call( node, function, "__Pyx_Py%s_%s%s%s" % ( 'Float' if is_float else 'Int', '' if ret_type.is_pyobject else 'Bool', operator, arg_order), self.Pyx_BinopInt_func_types[(num_type, ret_type)], '__%s__' % operator[:3].lower(), is_unbound_method, args, may_return_none=True, with_none_check=False, utility_code=utility_code) if node.type.is_pyobject and not ret_type.is_pyobject: call_node = ExprNodes.CoerceToPyTypeNode(call_node, self.current_env(), node.type) return call_node ### unicode type methods PyUnicode_uchar_predicate_func_type = PyrexTypes.CFuncType( PyrexTypes.c_bint_type, [ PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), ]) def _inject_unicode_predicate(self, node, function, args, is_unbound_method): if is_unbound_method or len(args) != 1: return node ustring = args[0] if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \ not ustring.arg.type.is_unicode_char: return node uchar = ustring.arg method_name = function.attribute if method_name == 'istitle': # istitle() doesn't directly map to Py_UNICODE_ISTITLE() utility_code = UtilityCode.load_cached( "py_unicode_istitle", "StringTools.c") function_name = '__Pyx_Py_UNICODE_ISTITLE' else: utility_code = None function_name = 'Py_UNICODE_%s' % method_name.upper() func_call = self._substitute_method_call( node, function, function_name, self.PyUnicode_uchar_predicate_func_type, method_name, is_unbound_method, [uchar], utility_code = utility_code) if node.type.is_pyobject: func_call = func_call.coerce_to_pyobject(self.current_env) return func_call _handle_simple_method_unicode_isalnum = _inject_unicode_predicate _handle_simple_method_unicode_isalpha = _inject_unicode_predicate _handle_simple_method_unicode_isdecimal = _inject_unicode_predicate _handle_simple_method_unicode_isdigit = _inject_unicode_predicate _handle_simple_method_unicode_islower = _inject_unicode_predicate _handle_simple_method_unicode_isnumeric = _inject_unicode_predicate _handle_simple_method_unicode_isspace = _inject_unicode_predicate _handle_simple_method_unicode_istitle = _inject_unicode_predicate _handle_simple_method_unicode_isupper = _inject_unicode_predicate PyUnicode_uchar_conversion_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ucs4_type, [ PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), ]) def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method): if is_unbound_method or len(args) != 1: return node ustring = args[0] if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \ not ustring.arg.type.is_unicode_char: return node uchar = ustring.arg method_name = function.attribute function_name = 'Py_UNICODE_TO%s' % method_name.upper() func_call = self._substitute_method_call( node, function, function_name, self.PyUnicode_uchar_conversion_func_type, method_name, is_unbound_method, [uchar]) if node.type.is_pyobject: func_call = func_call.coerce_to_pyobject(self.current_env) return func_call _handle_simple_method_unicode_lower = _inject_unicode_character_conversion _handle_simple_method_unicode_upper = _inject_unicode_character_conversion _handle_simple_method_unicode_title = _inject_unicode_character_conversion PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType( Builtin.list_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("keepends", PyrexTypes.c_bint_type, None), ]) def _handle_simple_method_unicode_splitlines(self, node, function, args, is_unbound_method): """Replace unicode.splitlines(...) by a direct call to the corresponding C-API function. """ if len(args) not in (1,2): self._error_wrong_arg_count('unicode.splitlines', node, args, "1 or 2") return node self._inject_bint_default_argument(node, args, 1, False) return self._substitute_method_call( node, function, "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type, 'splitlines', is_unbound_method, args) PyUnicode_Split_func_type = PyrexTypes.CFuncType( Builtin.list_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("sep", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("maxsplit", PyrexTypes.c_py_ssize_t_type, None), ] ) def _handle_simple_method_unicode_split(self, node, function, args, is_unbound_method): """Replace unicode.split(...) by a direct call to the corresponding C-API function. """ if len(args) not in (1,2,3): self._error_wrong_arg_count('unicode.split', node, args, "1-3") return node if len(args) < 2: args.append(ExprNodes.NullNode(node.pos)) self._inject_int_default_argument( node, args, 2, PyrexTypes.c_py_ssize_t_type, "-1") return self._substitute_method_call( node, function, "PyUnicode_Split", self.PyUnicode_Split_func_type, 'split', is_unbound_method, args) PyUnicode_Join_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("seq", PyrexTypes.py_object_type, None), ]) def _handle_simple_method_unicode_join(self, node, function, args, is_unbound_method): """ unicode.join() builds a list first => see if we can do this more efficiently """ if len(args) != 2: self._error_wrong_arg_count('unicode.join', node, args, "2") return node if isinstance(args[1], ExprNodes.GeneratorExpressionNode): gen_expr_node = args[1] loop_node = gen_expr_node.loop yield_statements = _find_yield_statements(loop_node) if yield_statements: inlined_genexpr = ExprNodes.InlinedGeneratorExpressionNode( node.pos, gen_expr_node, orig_func='list', comprehension_type=Builtin.list_type) for yield_expression, yield_stat_node in yield_statements: append_node = ExprNodes.ComprehensionAppendNode( yield_expression.pos, expr=yield_expression, target=inlined_genexpr.target) Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node) args[1] = inlined_genexpr return self._substitute_method_call( node, function, "PyUnicode_Join", self.PyUnicode_Join_func_type, 'join', is_unbound_method, args) PyString_Tailmatch_func_type = PyrexTypes.CFuncType( PyrexTypes.c_bint_type, [ PyrexTypes.CFuncTypeArg("str", PyrexTypes.py_object_type, None), # bytes/str/unicode PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None), ], exception_value = '-1') def _handle_simple_method_unicode_endswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'unicode', 'endswith', unicode_tailmatch_utility_code, +1) def _handle_simple_method_unicode_startswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'unicode', 'startswith', unicode_tailmatch_utility_code, -1) def _inject_tailmatch(self, node, function, args, is_unbound_method, type_name, method_name, utility_code, direction): """Replace unicode.startswith(...) and unicode.endswith(...) by a direct call to the corresponding C-API function. """ if len(args) not in (2,3,4): self._error_wrong_arg_count('%s.%s' % (type_name, method_name), node, args, "2-4") return node self._inject_int_default_argument( node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") self._inject_int_default_argument( node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") args.append(ExprNodes.IntNode( node.pos, value=str(direction), type=PyrexTypes.c_int_type)) method_call = self._substitute_method_call( node, function, "__Pyx_Py%s_Tailmatch" % type_name.capitalize(), self.PyString_Tailmatch_func_type, method_name, is_unbound_method, args, utility_code = utility_code) return method_call.coerce_to(Builtin.bool_type, self.current_env()) PyUnicode_Find_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None), ], exception_value = '-2') def _handle_simple_method_unicode_find(self, node, function, args, is_unbound_method): return self._inject_unicode_find( node, function, args, is_unbound_method, 'find', +1) def _handle_simple_method_unicode_rfind(self, node, function, args, is_unbound_method): return self._inject_unicode_find( node, function, args, is_unbound_method, 'rfind', -1) def _inject_unicode_find(self, node, function, args, is_unbound_method, method_name, direction): """Replace unicode.find(...) and unicode.rfind(...) by a direct call to the corresponding C-API function. """ if len(args) not in (2,3,4): self._error_wrong_arg_count('unicode.%s' % method_name, node, args, "2-4") return node self._inject_int_default_argument( node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") self._inject_int_default_argument( node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") args.append(ExprNodes.IntNode( node.pos, value=str(direction), type=PyrexTypes.c_int_type)) method_call = self._substitute_method_call( node, function, "PyUnicode_Find", self.PyUnicode_Find_func_type, method_name, is_unbound_method, args) return method_call.coerce_to_pyobject(self.current_env()) PyUnicode_Count_func_type = PyrexTypes.CFuncType( PyrexTypes.c_py_ssize_t_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None), ], exception_value = '-1') def _handle_simple_method_unicode_count(self, node, function, args, is_unbound_method): """Replace unicode.count(...) by a direct call to the corresponding C-API function. """ if len(args) not in (2,3,4): self._error_wrong_arg_count('unicode.count', node, args, "2-4") return node self._inject_int_default_argument( node, args, 2, PyrexTypes.c_py_ssize_t_type, "0") self._inject_int_default_argument( node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX") method_call = self._substitute_method_call( node, function, "PyUnicode_Count", self.PyUnicode_Count_func_type, 'count', is_unbound_method, args) return method_call.coerce_to_pyobject(self.current_env()) PyUnicode_Replace_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("replstr", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("maxcount", PyrexTypes.c_py_ssize_t_type, None), ]) def _handle_simple_method_unicode_replace(self, node, function, args, is_unbound_method): """Replace unicode.replace(...) by a direct call to the corresponding C-API function. """ if len(args) not in (3,4): self._error_wrong_arg_count('unicode.replace', node, args, "3-4") return node self._inject_int_default_argument( node, args, 3, PyrexTypes.c_py_ssize_t_type, "-1") return self._substitute_method_call( node, function, "PyUnicode_Replace", self.PyUnicode_Replace_func_type, 'replace', is_unbound_method, args) PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType( Builtin.bytes_type, [ PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None), ]) PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType( Builtin.bytes_type, [ PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), ]) _special_encodings = ['UTF8', 'UTF16', 'UTF-16LE', 'UTF-16BE', 'Latin1', 'ASCII', 'unicode_escape', 'raw_unicode_escape'] _special_codecs = [ (name, codecs.getencoder(name)) for name in _special_encodings ] def _handle_simple_method_unicode_encode(self, node, function, args, is_unbound_method): """Replace unicode.encode(...) by a direct C-API call to the corresponding codec. """ if len(args) < 1 or len(args) > 3: self._error_wrong_arg_count('unicode.encode', node, args, '1-3') return node string_node = args[0] if len(args) == 1: null_node = ExprNodes.NullNode(node.pos) return self._substitute_method_call( node, function, "PyUnicode_AsEncodedString", self.PyUnicode_AsEncodedString_func_type, 'encode', is_unbound_method, [string_node, null_node, null_node]) parameters = self._unpack_encoding_and_error_mode(node.pos, args) if parameters is None: return node encoding, encoding_node, error_handling, error_handling_node = parameters if encoding and isinstance(string_node, ExprNodes.UnicodeNode): # constant, so try to do the encoding at compile time try: value = string_node.value.encode(encoding, error_handling) except: # well, looks like we can't pass else: value = bytes_literal(value, encoding) return ExprNodes.BytesNode(string_node.pos, value=value, type=Builtin.bytes_type) if encoding and error_handling == 'strict': # try to find a specific encoder function codec_name = self._find_special_codec_name(encoding) if codec_name is not None and '-' not in codec_name: encode_function = "PyUnicode_As%sString" % codec_name return self._substitute_method_call( node, function, encode_function, self.PyUnicode_AsXyzString_func_type, 'encode', is_unbound_method, [string_node]) return self._substitute_method_call( node, function, "PyUnicode_AsEncodedString", self.PyUnicode_AsEncodedString_func_type, 'encode', is_unbound_method, [string_node, encoding_node, error_handling_node]) PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None), ])) _decode_c_string_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None), ]) _decode_bytes_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None), ]) _decode_cpp_string_func_type = None # lazy init def _handle_simple_method_bytes_decode(self, node, function, args, is_unbound_method): """Replace char*.decode() by a direct C-API call to the corresponding codec, possibly resolving a slice on the char*. """ if not (1 <= len(args) <= 3): self._error_wrong_arg_count('bytes.decode', node, args, '1-3') return node # normalise input nodes string_node = args[0] start = stop = None if isinstance(string_node, ExprNodes.SliceIndexNode): index_node = string_node string_node = index_node.base start, stop = index_node.start, index_node.stop if not start or start.constant_result == 0: start = None if isinstance(string_node, ExprNodes.CoerceToPyTypeNode): string_node = string_node.arg string_type = string_node.type if string_type in (Builtin.bytes_type, Builtin.bytearray_type): if is_unbound_method: string_node = string_node.as_none_safe_node( "descriptor '%s' requires a '%s' object but received a 'NoneType'", format_args=['decode', string_type.name]) else: string_node = string_node.as_none_safe_node( "'NoneType' object has no attribute '%.30s'", error="PyExc_AttributeError", format_args=['decode']) elif not string_type.is_string and not string_type.is_cpp_string: # nothing to optimise here return node parameters = self._unpack_encoding_and_error_mode(node.pos, args) if parameters is None: return node encoding, encoding_node, error_handling, error_handling_node = parameters if not start: start = ExprNodes.IntNode(node.pos, value='0', constant_result=0) elif not start.type.is_int: start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) if stop and not stop.type.is_int: stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) # try to find a specific encoder function codec_name = None if encoding is not None: codec_name = self._find_special_codec_name(encoding) if codec_name is not None: if codec_name in ('UTF16', 'UTF-16LE', 'UTF-16BE'): codec_cname = "__Pyx_PyUnicode_Decode%s" % codec_name.replace('-', '') else: codec_cname = "PyUnicode_Decode%s" % codec_name decode_function = ExprNodes.RawCNameExprNode( node.pos, type=self.PyUnicode_DecodeXyz_func_ptr_type, cname=codec_cname) encoding_node = ExprNodes.NullNode(node.pos) else: decode_function = ExprNodes.NullNode(node.pos) # build the helper function call temps = [] if string_type.is_string: # C string if not stop: # use strlen() to find the string length, just as CPython would if not string_node.is_name: string_node = UtilNodes.LetRefNode(string_node) # used twice temps.append(string_node) stop = ExprNodes.PythonCapiCallNode( string_node.pos, "strlen", self.Pyx_strlen_func_type, args=[string_node], is_temp=False, utility_code=UtilityCode.load_cached("IncludeStringH", "StringTools.c"), ).coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env()) helper_func_type = self._decode_c_string_func_type utility_code_name = 'decode_c_string' elif string_type.is_cpp_string: # C++ std::string if not stop: stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX', constant_result=ExprNodes.not_a_constant) if self._decode_cpp_string_func_type is None: # lazy init to reuse the C++ string type self._decode_cpp_string_func_type = PyrexTypes.CFuncType( Builtin.unicode_type, [ PyrexTypes.CFuncTypeArg("string", string_type, None), PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None), PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None), PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None), ]) helper_func_type = self._decode_cpp_string_func_type utility_code_name = 'decode_cpp_string' else: # Python bytes/bytearray object if not stop: stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX', constant_result=ExprNodes.not_a_constant) helper_func_type = self._decode_bytes_func_type if string_type is Builtin.bytes_type: utility_code_name = 'decode_bytes' else: utility_code_name = 'decode_bytearray' node = ExprNodes.PythonCapiCallNode( node.pos, '__Pyx_%s' % utility_code_name, helper_func_type, args=[string_node, start, stop, encoding_node, error_handling_node, decode_function], is_temp=node.is_temp, utility_code=UtilityCode.load_cached(utility_code_name, 'StringTools.c'), ) for temp in temps[::-1]: node = UtilNodes.EvalWithTempExprNode(temp, node) return node _handle_simple_method_bytearray_decode = _handle_simple_method_bytes_decode def _find_special_codec_name(self, encoding): try: requested_codec = codecs.getencoder(encoding) except LookupError: return None for name, codec in self._special_codecs: if codec == requested_codec: if '_' in name: name = ''.join([s.capitalize() for s in name.split('_')]) return name return None def _unpack_encoding_and_error_mode(self, pos, args): null_node = ExprNodes.NullNode(pos) if len(args) >= 2: encoding, encoding_node = self._unpack_string_and_cstring_node(args[1]) if encoding_node is None: return None else: encoding = None encoding_node = null_node if len(args) == 3: error_handling, error_handling_node = self._unpack_string_and_cstring_node(args[2]) if error_handling_node is None: return None if error_handling == 'strict': error_handling_node = null_node else: error_handling = 'strict' error_handling_node = null_node return (encoding, encoding_node, error_handling, error_handling_node) def _unpack_string_and_cstring_node(self, node): if isinstance(node, ExprNodes.CoerceToPyTypeNode): node = node.arg if isinstance(node, ExprNodes.UnicodeNode): encoding = node.value node = ExprNodes.BytesNode( node.pos, value=encoding.as_utf8_string(), type=PyrexTypes.c_const_char_ptr_type) elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)): encoding = node.value.decode('ISO-8859-1') node = ExprNodes.BytesNode( node.pos, value=node.value, type=PyrexTypes.c_const_char_ptr_type) elif node.type is Builtin.bytes_type: encoding = None node = node.coerce_to(PyrexTypes.c_const_char_ptr_type, self.current_env()) elif node.type.is_string: encoding = None else: encoding = node = None return encoding, node def _handle_simple_method_str_endswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'str', 'endswith', str_tailmatch_utility_code, +1) def _handle_simple_method_str_startswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'str', 'startswith', str_tailmatch_utility_code, -1) def _handle_simple_method_bytes_endswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'bytes', 'endswith', bytes_tailmatch_utility_code, +1) def _handle_simple_method_bytes_startswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'bytes', 'startswith', bytes_tailmatch_utility_code, -1) ''' # disabled for now, enable when we consider it worth it (see StringTools.c) def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'bytearray', 'endswith', bytes_tailmatch_utility_code, +1) def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method): return self._inject_tailmatch( node, function, args, is_unbound_method, 'bytearray', 'startswith', bytes_tailmatch_utility_code, -1) ''' ### helpers def _substitute_method_call(self, node, function, name, func_type, attr_name, is_unbound_method, args=(), utility_code=None, is_temp=None, may_return_none=ExprNodes.PythonCapiCallNode.may_return_none, with_none_check=True): args = list(args) if with_none_check and args: args[0] = self._wrap_self_arg(args[0], function, is_unbound_method, attr_name) if is_temp is None: is_temp = node.is_temp return ExprNodes.PythonCapiCallNode( node.pos, name, func_type, args = args, is_temp = is_temp, utility_code = utility_code, may_return_none = may_return_none, result_is_used = node.result_is_used, ) def _wrap_self_arg(self, self_arg, function, is_unbound_method, attr_name): if self_arg.is_literal: return self_arg if is_unbound_method: self_arg = self_arg.as_none_safe_node( "descriptor '%s' requires a '%s' object but received a 'NoneType'", format_args=[attr_name, self_arg.type.name]) else: self_arg = self_arg.as_none_safe_node( "'NoneType' object has no attribute '%{0}s'".format('.30' if len(attr_name) <= 30 else ''), error="PyExc_AttributeError", format_args=[attr_name]) return self_arg def _inject_int_default_argument(self, node, args, arg_index, type, default_value): assert len(args) >= arg_index if len(args) == arg_index: args.append(ExprNodes.IntNode(node.pos, value=str(default_value), type=type, constant_result=default_value)) else: args[arg_index] = args[arg_index].coerce_to(type, self.current_env()) def _inject_bint_default_argument(self, node, args, arg_index, default_value): assert len(args) >= arg_index if len(args) == arg_index: default_value = bool(default_value) args.append(ExprNodes.BoolNode(node.pos, value=default_value, constant_result=default_value)) else: args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env()) unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c') bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c') str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c') class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations): """Calculate the result of constant expressions to store it in ``expr_node.constant_result``, and replace trivial cases by their constant result. General rules: - We calculate float constants to make them available to the compiler, but we do not aggregate them into a single literal node to prevent any loss of precision. - We recursively calculate constants from non-literal nodes to make them available to the compiler, but we only aggregate literal nodes at each step. Non-literal nodes are never merged into a single node. """ def __init__(self, reevaluate=False): """ The reevaluate argument specifies whether constant values that were previously computed should be recomputed. """ super(ConstantFolding, self).__init__() self.reevaluate = reevaluate def _calculate_const(self, node): if (not self.reevaluate and node.constant_result is not ExprNodes.constant_value_not_set): return # make sure we always set the value not_a_constant = ExprNodes.not_a_constant node.constant_result = not_a_constant # check if all children are constant children = self.visitchildren(node) for child_result in children.values(): if type(child_result) is list: for child in child_result: if getattr(child, 'constant_result', not_a_constant) is not_a_constant: return elif getattr(child_result, 'constant_result', not_a_constant) is not_a_constant: return # now try to calculate the real constant value try: node.calculate_constant_result() # if node.constant_result is not ExprNodes.not_a_constant: # print node.__class__.__name__, node.constant_result except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError): # ignore all 'normal' errors here => no constant result pass except Exception: # this looks like a real error import traceback, sys traceback.print_exc(file=sys.stdout) NODE_TYPE_ORDER = [ExprNodes.BoolNode, ExprNodes.CharNode, ExprNodes.IntNode, ExprNodes.FloatNode] def _widest_node_class(self, *nodes): try: return self.NODE_TYPE_ORDER[ max(map(self.NODE_TYPE_ORDER.index, map(type, nodes)))] except ValueError: return None def _bool_node(self, node, value): value = bool(value) return ExprNodes.BoolNode(node.pos, value=value, constant_result=value) def visit_ExprNode(self, node): self._calculate_const(node) return node def visit_UnopNode(self, node): self._calculate_const(node) if not node.has_constant_result(): if node.operator == '!': return self._handle_NotNode(node) return node if not node.operand.is_literal: return node if node.operator == '!': return self._bool_node(node, node.constant_result) elif isinstance(node.operand, ExprNodes.BoolNode): return ExprNodes.IntNode(node.pos, value=str(int(node.constant_result)), type=PyrexTypes.c_int_type, constant_result=int(node.constant_result)) elif node.operator == '+': return self._handle_UnaryPlusNode(node) elif node.operator == '-': return self._handle_UnaryMinusNode(node) return node _negate_operator = { 'in': 'not_in', 'not_in': 'in', 'is': 'is_not', 'is_not': 'is' }.get def _handle_NotNode(self, node): operand = node.operand if isinstance(operand, ExprNodes.PrimaryCmpNode): operator = self._negate_operator(operand.operator) if operator: node = copy.copy(operand) node.operator = operator node = self.visit_PrimaryCmpNode(node) return node def _handle_UnaryMinusNode(self, node): def _negate(value): if value.startswith('-'): value = value[1:] else: value = '-' + value return value node_type = node.operand.type if isinstance(node.operand, ExprNodes.FloatNode): # this is a safe operation return ExprNodes.FloatNode(node.pos, value=_negate(node.operand.value), type=node_type, constant_result=node.constant_result) if node_type.is_int and node_type.signed or \ isinstance(node.operand, ExprNodes.IntNode) and node_type.is_pyobject: return ExprNodes.IntNode(node.pos, value=_negate(node.operand.value), type=node_type, longness=node.operand.longness, constant_result=node.constant_result) return node def _handle_UnaryPlusNode(self, node): if (node.operand.has_constant_result() and node.constant_result == node.operand.constant_result): return node.operand return node def visit_BoolBinopNode(self, node): self._calculate_const(node) if not node.operand1.has_constant_result(): return node if node.operand1.constant_result: if node.operator == 'and': return node.operand2 else: return node.operand1 else: if node.operator == 'and': return node.operand1 else: return node.operand2 def visit_BinopNode(self, node): self._calculate_const(node) if node.constant_result is ExprNodes.not_a_constant: return node if isinstance(node.constant_result, float): return node operand1, operand2 = node.operand1, node.operand2 if not operand1.is_literal or not operand2.is_literal: return node # now inject a new constant node with the calculated value try: type1, type2 = operand1.type, operand2.type if type1 is None or type2 is None: return node except AttributeError: return node if type1.is_numeric and type2.is_numeric: widest_type = PyrexTypes.widest_numeric_type(type1, type2) else: widest_type = PyrexTypes.py_object_type target_class = self._widest_node_class(operand1, operand2) if target_class is None: return node elif target_class is ExprNodes.BoolNode and node.operator in '+-//<<%**>>': # C arithmetic results in at least an int type target_class = ExprNodes.IntNode elif target_class is ExprNodes.CharNode and node.operator in '+-//<<%**>>&|^': # C arithmetic results in at least an int type target_class = ExprNodes.IntNode if target_class is ExprNodes.IntNode: unsigned = getattr(operand1, 'unsigned', '') and \ getattr(operand2, 'unsigned', '') longness = "LL"[:max(len(getattr(operand1, 'longness', '')), len(getattr(operand2, 'longness', '')))] new_node = ExprNodes.IntNode(pos=node.pos, unsigned=unsigned, longness=longness, value=str(int(node.constant_result)), constant_result=int(node.constant_result)) # IntNode is smart about the type it chooses, so we just # make sure we were not smarter this time if widest_type.is_pyobject or new_node.type.is_pyobject: new_node.type = PyrexTypes.py_object_type else: new_node.type = PyrexTypes.widest_numeric_type(widest_type, new_node.type) else: if target_class is ExprNodes.BoolNode: node_value = node.constant_result else: node_value = str(node.constant_result) new_node = target_class(pos=node.pos, type = widest_type, value = node_value, constant_result = node.constant_result) return new_node def visit_AddNode(self, node): self._calculate_const(node) if node.constant_result is ExprNodes.not_a_constant: return node if node.operand1.is_string_literal and node.operand2.is_string_literal: # some people combine string literals with a '+' str1, str2 = node.operand1, node.operand2 if isinstance(str1, ExprNodes.UnicodeNode) and isinstance(str2, ExprNodes.UnicodeNode): bytes_value = None if str1.bytes_value is not None and str2.bytes_value is not None: if str1.bytes_value.encoding == str2.bytes_value.encoding: bytes_value = bytes_literal( str1.bytes_value + str2.bytes_value, str1.bytes_value.encoding) string_value = EncodedString(node.constant_result) return ExprNodes.UnicodeNode( str1.pos, value=string_value, constant_result=node.constant_result, bytes_value=bytes_value) elif isinstance(str1, ExprNodes.BytesNode) and isinstance(str2, ExprNodes.BytesNode): if str1.value.encoding == str2.value.encoding: bytes_value = bytes_literal(node.constant_result, str1.value.encoding) return ExprNodes.BytesNode(str1.pos, value=bytes_value, constant_result=node.constant_result) # all other combinations are rather complicated # to get right in Py2/3: encodings, unicode escapes, ... return self.visit_BinopNode(node) def visit_MulNode(self, node): self._calculate_const(node) if node.operand1.is_sequence_constructor: return self._calculate_constant_seq(node, node.operand1, node.operand2) if isinstance(node.operand1, ExprNodes.IntNode) and \ node.operand2.is_sequence_constructor: return self._calculate_constant_seq(node, node.operand2, node.operand1) if node.operand1.is_string_literal: return self._multiply_string(node, node.operand1, node.operand2) elif node.operand2.is_string_literal: return self._multiply_string(node, node.operand2, node.operand1) return self.visit_BinopNode(node) def _multiply_string(self, node, string_node, multiplier_node): multiplier = multiplier_node.constant_result if not isinstance(multiplier, _py_int_types): return node if not (node.has_constant_result() and isinstance(node.constant_result, _py_string_types)): return node if len(node.constant_result) > 256: # Too long for static creation, leave it to runtime. (-> arbitrary limit) return node build_string = encoded_string if isinstance(string_node, ExprNodes.BytesNode): build_string = bytes_literal elif isinstance(string_node, ExprNodes.StringNode): if string_node.unicode_value is not None: string_node.unicode_value = encoded_string( string_node.unicode_value * multiplier, string_node.unicode_value.encoding) build_string = encoded_string if string_node.value.is_unicode else bytes_literal elif isinstance(string_node, ExprNodes.UnicodeNode): if string_node.bytes_value is not None: string_node.bytes_value = bytes_literal( string_node.bytes_value * multiplier, string_node.bytes_value.encoding) else: assert False, "unknown string node type: %s" % type(string_node) string_node.value = build_string( string_node.value * multiplier, string_node.value.encoding) # follow constant-folding and use unicode_value in preference if isinstance(string_node, ExprNodes.StringNode) and string_node.unicode_value is not None: string_node.constant_result = string_node.unicode_value else: string_node.constant_result = string_node.value return string_node def _calculate_constant_seq(self, node, sequence_node, factor): if factor.constant_result != 1 and sequence_node.args: if isinstance(factor.constant_result, _py_int_types) and factor.constant_result <= 0: del sequence_node.args[:] sequence_node.mult_factor = None elif sequence_node.mult_factor is not None: if (isinstance(factor.constant_result, _py_int_types) and isinstance(sequence_node.mult_factor.constant_result, _py_int_types)): value = sequence_node.mult_factor.constant_result * factor.constant_result sequence_node.mult_factor = ExprNodes.IntNode( sequence_node.mult_factor.pos, value=str(value), constant_result=value) else: # don't know if we can combine the factors, so don't return self.visit_BinopNode(node) else: sequence_node.mult_factor = factor return sequence_node def visit_ModNode(self, node): self.visitchildren(node) if isinstance(node.operand1, ExprNodes.UnicodeNode) and isinstance(node.operand2, ExprNodes.TupleNode): if not node.operand2.mult_factor: fstring = self._build_fstring(node.operand1.pos, node.operand1.value, node.operand2.args) if fstring is not None: return fstring return self.visit_BinopNode(node) _parse_string_format_regex = ( u'(%(?:' # %... u'(?:[-0-9]+|[ ])?' # width (optional) or space prefix fill character (optional) u'(?:[.][0-9]+)?' # precision (optional) u')?.)' # format type (or something different for unsupported formats) ) def _build_fstring(self, pos, ustring, format_args): # Issues formatting warnings instead of errors since we really only catch a few errors by accident. args = iter(format_args) substrings = [] can_be_optimised = True for s in re.split(self._parse_string_format_regex, ustring): if not s: continue if s == u'%%': substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u'%'), constant_result=u'%')) continue if s[0] != u'%': if s[-1] == u'%': warning(pos, "Incomplete format: '...%s'" % s[-3:], level=1) can_be_optimised = False substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(s), constant_result=s)) continue format_type = s[-1] try: arg = next(args) except StopIteration: warning(pos, "Too few arguments for format placeholders", level=1) can_be_optimised = False break if arg.is_starred: can_be_optimised = False break if format_type in u'asrfdoxX': format_spec = s[1:] conversion_char = None if format_type in u'doxX' and u'.' in format_spec: # Precision is not allowed for integers in format(), but ok in %-formatting. can_be_optimised = False elif format_type in u'ars': format_spec = format_spec[:-1] conversion_char = format_type if format_spec.startswith('0'): format_spec = '>' + format_spec[1:] # right-alignment '%05s' spells '{:>5}' elif format_type == u'd': # '%d' formatting supports float, but '{obj:d}' does not => convert to int first. conversion_char = 'd' if format_spec.startswith('-'): format_spec = '<' + format_spec[1:] # left-alignment '%-5s' spells '{:<5}' substrings.append(ExprNodes.FormattedValueNode( arg.pos, value=arg, conversion_char=conversion_char, format_spec=ExprNodes.UnicodeNode( pos, value=EncodedString(format_spec), constant_result=format_spec) if format_spec else None, )) else: # keep it simple for now ... can_be_optimised = False break if not can_be_optimised: # Print all warnings we can find before finally giving up here. return None try: next(args) except StopIteration: pass else: warning(pos, "Too many arguments for format placeholders", level=1) return None node = ExprNodes.JoinedStrNode(pos, values=substrings) return self.visit_JoinedStrNode(node) def visit_FormattedValueNode(self, node): self.visitchildren(node) conversion_char = node.conversion_char or 's' if isinstance(node.format_spec, ExprNodes.UnicodeNode) and not node.format_spec.value: node.format_spec = None if node.format_spec is None and isinstance(node.value, ExprNodes.IntNode): value = EncodedString(node.value.value) if value.isdigit(): return ExprNodes.UnicodeNode(node.value.pos, value=value, constant_result=value) if node.format_spec is None and conversion_char == 's': value = None if isinstance(node.value, ExprNodes.UnicodeNode): value = node.value.value elif isinstance(node.value, ExprNodes.StringNode): value = node.value.unicode_value if value is not None: return ExprNodes.UnicodeNode(node.value.pos, value=value, constant_result=value) return node def visit_JoinedStrNode(self, node): """ Clean up after the parser by discarding empty Unicode strings and merging substring sequences. Empty or single-value join lists are not uncommon because f-string format specs are always parsed into JoinedStrNodes. """ self.visitchildren(node) unicode_node = ExprNodes.UnicodeNode values = [] for is_unode_group, substrings in itertools.groupby(node.values, lambda v: isinstance(v, unicode_node)): if is_unode_group: substrings = list(substrings) unode = substrings[0] if len(substrings) > 1: value = EncodedString(u''.join(value.value for value in substrings)) unode = ExprNodes.UnicodeNode(unode.pos, value=value, constant_result=value) # ignore empty Unicode strings if unode.value: values.append(unode) else: values.extend(substrings) if not values: value = EncodedString('') node = ExprNodes.UnicodeNode(node.pos, value=value, constant_result=value) elif len(values) == 1: node = values[0] elif len(values) == 2: # reduce to string concatenation node = ExprNodes.binop_node(node.pos, '+', *values) else: node.values = values return node def visit_MergedDictNode(self, node): """Unpack **args in place if we can.""" self.visitchildren(node) args = [] items = [] def add(arg): if arg.is_dict_literal: if items: items[0].key_value_pairs.extend(arg.key_value_pairs) else: items.append(arg) elif isinstance(arg, ExprNodes.MergedDictNode): for child_arg in arg.keyword_args: add(child_arg) else: if items: args.append(items[0]) del items[:] args.append(arg) for arg in node.keyword_args: add(arg) if items: args.append(items[0]) if len(args) == 1: arg = args[0] if arg.is_dict_literal or isinstance(arg, ExprNodes.MergedDictNode): return arg node.keyword_args[:] = args self._calculate_const(node) return node def visit_MergedSequenceNode(self, node): """Unpack *args in place if we can.""" self.visitchildren(node) is_set = node.type is Builtin.set_type args = [] values = [] def add(arg): if (is_set and arg.is_set_literal) or (arg.is_sequence_constructor and not arg.mult_factor): if values: values[0].args.extend(arg.args) else: values.append(arg) elif isinstance(arg, ExprNodes.MergedSequenceNode): for child_arg in arg.args: add(child_arg) else: if values: args.append(values[0]) del values[:] args.append(arg) for arg in node.args: add(arg) if values: args.append(values[0]) if len(args) == 1: arg = args[0] if ((is_set and arg.is_set_literal) or (arg.is_sequence_constructor and arg.type is node.type) or isinstance(arg, ExprNodes.MergedSequenceNode)): return arg node.args[:] = args self._calculate_const(node) return node def visit_SequenceNode(self, node): """Unpack *args in place if we can.""" self.visitchildren(node) args = [] for arg in node.args: if not arg.is_starred: args.append(arg) elif arg.target.is_sequence_constructor and not arg.target.mult_factor: args.extend(arg.target.args) else: args.append(arg) node.args[:] = args self._calculate_const(node) return node def visit_PrimaryCmpNode(self, node): # calculate constant partial results in the comparison cascade self.visitchildren(node, ['operand1']) left_node = node.operand1 cmp_node = node while cmp_node is not None: self.visitchildren(cmp_node, ['operand2']) right_node = cmp_node.operand2 cmp_node.constant_result = not_a_constant if left_node.has_constant_result() and right_node.has_constant_result(): try: cmp_node.calculate_cascaded_constant_result(left_node.constant_result) except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError): pass # ignore all 'normal' errors here => no constant result left_node = right_node cmp_node = cmp_node.cascade if not node.cascade: if node.has_constant_result(): return self._bool_node(node, node.constant_result) return node # collect partial cascades: [[value, CmpNode...], [value, CmpNode, ...], ...] cascades = [[node.operand1]] final_false_result = [] def split_cascades(cmp_node): if cmp_node.has_constant_result(): if not cmp_node.constant_result: # False => short-circuit final_false_result.append(self._bool_node(cmp_node, False)) return else: # True => discard and start new cascade cascades.append([cmp_node.operand2]) else: # not constant => append to current cascade cascades[-1].append(cmp_node) if cmp_node.cascade: split_cascades(cmp_node.cascade) split_cascades(node) cmp_nodes = [] for cascade in cascades: if len(cascade) < 2: continue cmp_node = cascade[1] pcmp_node = ExprNodes.PrimaryCmpNode( cmp_node.pos, operand1=cascade[0], operator=cmp_node.operator, operand2=cmp_node.operand2, constant_result=not_a_constant) cmp_nodes.append(pcmp_node) last_cmp_node = pcmp_node for cmp_node in cascade[2:]: last_cmp_node.cascade = cmp_node last_cmp_node = cmp_node last_cmp_node.cascade = None if final_false_result: # last cascade was constant False cmp_nodes.append(final_false_result[0]) elif not cmp_nodes: # only constants, but no False result return self._bool_node(node, True) node = cmp_nodes[0] if len(cmp_nodes) == 1: if node.has_constant_result(): return self._bool_node(node, node.constant_result) else: for cmp_node in cmp_nodes[1:]: node = ExprNodes.BoolBinopNode( node.pos, operand1=node, operator='and', operand2=cmp_node, constant_result=not_a_constant) return node def visit_CondExprNode(self, node): self._calculate_const(node) if not node.test.has_constant_result(): return node if node.test.constant_result: return node.true_val else: return node.false_val def visit_IfStatNode(self, node): self.visitchildren(node) # eliminate dead code based on constant condition results if_clauses = [] for if_clause in node.if_clauses: condition = if_clause.condition if condition.has_constant_result(): if condition.constant_result: # always true => subsequent clauses can safely be dropped node.else_clause = if_clause.body break # else: false => drop clause else: # unknown result => normal runtime evaluation if_clauses.append(if_clause) if if_clauses: node.if_clauses = if_clauses return node elif node.else_clause: return node.else_clause else: return Nodes.StatListNode(node.pos, stats=[]) def visit_SliceIndexNode(self, node): self._calculate_const(node) # normalise start/stop values if node.start is None or node.start.constant_result is None: start = node.start = None else: start = node.start.constant_result if node.stop is None or node.stop.constant_result is None: stop = node.stop = None else: stop = node.stop.constant_result # cut down sliced constant sequences if node.constant_result is not not_a_constant: base = node.base if base.is_sequence_constructor and base.mult_factor is None: base.args = base.args[start:stop] return base elif base.is_string_literal: base = base.as_sliced_node(start, stop) if base is not None: return base return node def visit_ComprehensionNode(self, node): self.visitchildren(node) if isinstance(node.loop, Nodes.StatListNode) and not node.loop.stats: # loop was pruned already => transform into literal if node.type is Builtin.list_type: return ExprNodes.ListNode( node.pos, args=[], constant_result=[]) elif node.type is Builtin.set_type: return ExprNodes.SetNode( node.pos, args=[], constant_result=set()) elif node.type is Builtin.dict_type: return ExprNodes.DictNode( node.pos, key_value_pairs=[], constant_result={}) return node def visit_ForInStatNode(self, node): self.visitchildren(node) sequence = node.iterator.sequence if isinstance(sequence, ExprNodes.SequenceNode): if not sequence.args: if node.else_clause: return node.else_clause else: # don't break list comprehensions return Nodes.StatListNode(node.pos, stats=[]) # iterating over a list literal? => tuples are more efficient if isinstance(sequence, ExprNodes.ListNode): node.iterator.sequence = sequence.as_tuple() return node def visit_WhileStatNode(self, node): self.visitchildren(node) if node.condition and node.condition.has_constant_result(): if node.condition.constant_result: node.condition = None node.else_clause = None else: return node.else_clause return node def visit_ExprStatNode(self, node): self.visitchildren(node) if not isinstance(node.expr, ExprNodes.ExprNode): # ParallelRangeTransform does this ... return node # drop unused constant expressions if node.expr.has_constant_result(): return None return node # in the future, other nodes can have their own handler method here # that can replace them with a constant result node visit_Node = Visitor.VisitorTransform.recurse_to_children class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin): """ This visitor handles several commuting optimizations, and is run just before the C code generation phase. The optimizations currently implemented in this class are: - eliminate None assignment and refcounting for first assignment. - isinstance -> typecheck for cdef types - eliminate checks for None and/or types that became redundant after tree changes - eliminate useless string formatting steps - replace Python function calls that look like method calls by a faster PyMethodCallNode """ in_loop = False def visit_SingleAssignmentNode(self, node): """Avoid redundant initialisation of local variables before their first assignment. """ self.visitchildren(node) if node.first: lhs = node.lhs lhs.lhs_of_first_assignment = True return node def visit_SimpleCallNode(self, node): """ Replace generic calls to isinstance(x, type) by a more efficient type check. Replace likely Python method calls by a specialised PyMethodCallNode. """ self.visitchildren(node) function = node.function if function.type.is_cfunction and function.is_name: if function.name == 'isinstance' and len(node.args) == 2: type_arg = node.args[1] if type_arg.type.is_builtin_type and type_arg.type.name == 'type': cython_scope = self.context.cython_scope function.entry = cython_scope.lookup('PyObject_TypeCheck') function.type = function.entry.type PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type) node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr) elif (node.is_temp and function.type.is_pyobject and self.current_directives.get( "optimize.unpack_method_calls_in_pyinit" if not self.in_loop and self.current_env().is_module_scope else "optimize.unpack_method_calls")): # optimise simple Python methods calls if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not ( node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and len(node.arg_tuple.args) > 1)): # simple call, now exclude calls to objects that are definitely not methods may_be_a_method = True if function.type is Builtin.type_type: may_be_a_method = False elif function.is_attribute: if function.entry and function.entry.type.is_cfunction: # optimised builtin method may_be_a_method = False elif function.is_name: entry = function.entry if entry.is_builtin or entry.type.is_cfunction: may_be_a_method = False elif entry.cf_assignments: # local functions/classes are definitely not methods non_method_nodes = (ExprNodes.PyCFunctionNode, ExprNodes.ClassNode, ExprNodes.Py3ClassNode) may_be_a_method = any( assignment.rhs and not isinstance(assignment.rhs, non_method_nodes) for assignment in entry.cf_assignments) if may_be_a_method: if (node.self and function.is_attribute and isinstance(function.obj, ExprNodes.CloneNode) and function.obj.arg is node.self): # function self object was moved into a CloneNode => undo function.obj = function.obj.arg node = self.replace(node, ExprNodes.PyMethodCallNode.from_node( node, function=function, arg_tuple=node.arg_tuple, type=node.type)) return node def visit_NumPyMethodCallNode(self, node): # Exclude from replacement above. self.visitchildren(node) return node def visit_PyTypeTestNode(self, node): """Remove tests for alternatively allowed None values from type tests when we know that the argument cannot be None anyway. """ self.visitchildren(node) if not node.notnone: if not node.arg.may_be_none(): node.notnone = True return node def visit_NoneCheckNode(self, node): """Remove None checks from expressions that definitely do not carry a None value. """ self.visitchildren(node) if not node.arg.may_be_none(): return node.arg return node def visit_LoopNode(self, node): """Remember when we enter a loop as some expensive optimisations might still be worth it there. """ old_val = self.in_loop self.in_loop = True self.visitchildren(node) self.in_loop = old_val return node class ConsolidateOverflowCheck(Visitor.CythonTransform): """ This class facilitates the sharing of overflow checking among all nodes of a nested arithmetic expression. For example, given the expression a*b + c, where a, b, and x are all possibly overflowing ints, the entire sequence will be evaluated and the overflow bit checked only at the end. """ overflow_bit_node = None def visit_Node(self, node): if self.overflow_bit_node is not None: saved = self.overflow_bit_node self.overflow_bit_node = None self.visitchildren(node) self.overflow_bit_node = saved else: self.visitchildren(node) return node def visit_NumBinopNode(self, node): if node.overflow_check and node.overflow_fold: top_level_overflow = self.overflow_bit_node is None if top_level_overflow: self.overflow_bit_node = node else: node.overflow_bit_node = self.overflow_bit_node node.overflow_check = False self.visitchildren(node) if top_level_overflow: self.overflow_bit_node = None else: self.visitchildren(node) return node