|
| 1 | +from pytensor.graph.basic import Constant |
| 2 | +from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter |
| 3 | +from pytensor.tensor.blockwise import Blockwise |
| 4 | +from pytensor.tensor.rewriting.basic import register_specialize, register_stabilize |
| 5 | +from pytensor.tensor.signal import convolve1d |
| 6 | +from pytensor.tensor.signal.conv import Convolve1d |
| 7 | +from pytensor.tensor.subtensor import Subtensor, indices_from_subtensor |
| 8 | + |
| 9 | + |
| 10 | +@register_stabilize |
| 11 | +@register_specialize |
| 12 | +@node_rewriter([Subtensor]) |
| 13 | +def local_sliced_full_conv_to_valid_conv(fgraph, node): |
| 14 | + """Rewrite sliced full conv that are equivalent to valid. |
| 15 | +
|
| 16 | + The gradient of a valid Conv1d always implements the worst case scenario - full convolution - |
| 17 | + because it would need to know which input is larger to do something smarter. |
| 18 | + If we find out (through rewrites or static shape) we provide the direct implementation |
| 19 | + which can be orders of magnitude faster. |
| 20 | +
|
| 21 | + # if x.shape[-1] > y.shape[-1] |
| 22 | + # z = convolve1d(x, y, mode="full") |
| 23 | + # z[..., y.shape[-1] - 1: z.shape[-1] - y.shape[-1] - 1] -> convolve1d(x, y, mode="valid") |
| 24 | + """ |
| 25 | + conv, *other_idx_vars = node.inputs |
| 26 | + |
| 27 | + if not ( |
| 28 | + conv.owner is not None |
| 29 | + and isinstance(conv.owner.op, Blockwise) |
| 30 | + and isinstance(conv.owner.op.core_op, Convolve1d) |
| 31 | + and conv.owner.op.core_op.mode == "full" |
| 32 | + ): |
| 33 | + return None |
| 34 | + |
| 35 | + # Check we have an (a:b) constant slice at the last axis of the input |
| 36 | + idx_list = node.op.idx_list |
| 37 | + if not (len(idx_list) == conv.type.ndim and isinstance(idx_list[-1], slice)): |
| 38 | + return None |
| 39 | + |
| 40 | + last_slice = idx_list[-1] |
| 41 | + if not ( |
| 42 | + last_slice.start is not None |
| 43 | + and last_slice.stop is not None |
| 44 | + and last_slice.step is None |
| 45 | + ): |
| 46 | + return None |
| 47 | + |
| 48 | + *other_idx_vars, start, stop = other_idx_vars |
| 49 | + if not (isinstance(start, Constant) and isinstance(stop, Constant)): |
| 50 | + return None |
| 51 | + |
| 52 | + x, y = conv.owner.inputs |
| 53 | + len_x = x.type.shape[-1] |
| 54 | + len_y = y.type.shape[-1] |
| 55 | + if len_x is None or len_y is None: |
| 56 | + return None |
| 57 | + |
| 58 | + start, stop = start.data, stop.data |
| 59 | + if len_x < len_y: |
| 60 | + # Convolution is symmetric with input order |
| 61 | + x, y = y, x |
| 62 | + len_x, len_y = len_y, len_x |
| 63 | + |
| 64 | + if ( |
| 65 | + start == len_y - 1 |
| 66 | + # equivalent to stop = conv.shape[-1] - len_y - 1 |
| 67 | + and stop == start + (len_x - len_y) + 1 |
| 68 | + ): |
| 69 | + new_conv = convolve1d(x, y, mode="valid") |
| 70 | + copy_stack_trace(conv, new_conv) |
| 71 | + |
| 72 | + if other_idx_vars: |
| 73 | + # If there were more than just empty slices besides the last one |
| 74 | + new_indices = indices_from_subtensor(idx_list[:-1], other_idx_vars) |
| 75 | + new_conv = new_conv[new_indices] |
| 76 | + copy_stack_trace(node.out, new_conv) |
| 77 | + |
| 78 | + return [new_conv] |
0 commit comments