diff --git a/programming_examples/basic/dma_transpose/aie2.py b/programming_examples/basic/dma_transpose/aie2.py index d4e5f21793..48f2b443df 100644 --- a/programming_examples/basic/dma_transpose/aie2.py +++ b/programming_examples/basic/dma_transpose/aie2.py @@ -34,8 +34,8 @@ # TODO: clean up placement # TODO: logic to put dummy core if link has core location but core not specified -worker_program = MyWorker(None, [], coords=(0, 2)) -my_link = MyObjectFifoLink([of_in.second], [of_out.first], coords=(0, 2)) +worker_program = MyWorker(None, [], coords=(0, 2), intermediate=AnyMemtile) +# my_link = MyObjectFifoLink([of_in.second], [of_out.first], coords=(0, 2)) # TODO: take memref_type for input/output instead? inout_program = SimpleFifoInOutProgram( diff --git a/programming_examples/basic/matrix_multiplication/single_core/aie2.py b/programming_examples/basic/matrix_multiplication/single_core/aie2.py index a4b0e3fa89..771b5ab9a9 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/aie2.py +++ b/programming_examples/basic/matrix_multiplication/single_core/aie2.py @@ -243,8 +243,15 @@ def sequence_fn(A, B, C, inA, inB, outC): [inA.first, inB.first, outC.second], coords=(0, 0), ) + + # AnyMemtile + c = LogicalCore() + c2 = c.neighbor() + worker_program = MyWorker( - core_fn, [memA.second, memB.second, memC.first, zero, matmul], coords=(0, 2) + core_fn, + [memA.second, memB.second, memC.first, zero, matmul], + AnyCore, # coords=(0, 2) ) my_program = MyProgram( @@ -252,7 +259,11 @@ def sequence_fn(A, B, C, inA, inB, outC): worker_programs=[worker_program], links=[inALink, inBLink, outCLink], inout_program=inout_program, + placer=SequentialPlace(), # GraphBasedPlacer() # CoreOnlyPlace() -> anything memtile has to be decided by Programmer ) + + # g = my_program.get_dataflow_graph() + my_program.resolve_program() diff --git a/programming_examples/basic/passthrough_kernel/aie2.py b/programming_examples/basic/passthrough_kernel/aie2.py index 1a219ee097..05e4fa0ab4 100644 --- a/programming_examples/basic/passthrough_kernel/aie2.py +++ b/programming_examples/basic/passthrough_kernel/aie2.py @@ -64,3 +64,48 @@ def core_fn(of_in, of_out, passThroughLine): NPU1Col1(), worker_programs=[worker_program], inout_program=inout_program ) my_program.resolve_program() + +""" +Brainstorming on FifoDepth: + +Depth: + * MyObjectFifo(n, line_type) + * Expect strictly == or < n for actual depth + * MyObjectFifo([n, m], line_type) + * object_fifo(prod, [cons1, cons2], elem_depth=[prod_depth, cons_depth1, cons_depth2]) + * We have examples where sometimes cons1 close, cons2 is far (skip connection - have as far as 7). If gave 1 single depth of 7 -- too much. + * Need to choose which depths to force/not_force + + * MyObjectFifo(line_type) + * Expect highest some n+1 ?? + + * MyObjectFifo(n, line_type, force_depth=True) + * ALWAYS n + + +m = MyObjectFifo() +m.first.enforce_depth() + +Can write a number or array. Number is treated flexibly. Array is treated strictly. +Force may lead to deadlock. + +Not clean case: +* How to enforce depths with broadcast when not shared mem? Created + +Depth (Cases): +* object_fifo(prod, [cons1, cons2], elem_depth=[prod_depth, cons_depth1, cons_depth2]) + + +* object_fifo(prod, [cons1, cons2], elem_depth=[(prod_depth, is_force), (cons_depth1, is_force), (cons_depth2, is_force)]) +* object_fifo(prod, [cons1, cons2], forced_elem_depth=[]) + +* object_fifo(prod, cons, elem_depth=[prod_depth, cons_depth]) + * if prod_depth != cons_depth -> cannot use shared? Or just discard one of the depths? + +Maybe (I don't like this): +* object_fifo(prod, cons, depth?) +* object_fifo((prod, depth), (cons, depth)) + +TODO: ObjectFifo Skip Connection Analysis, adjusts depth for you based on that. +TODO: Maybe fusing kernels? +""" diff --git a/python/api/dataflow/inout/simplefifoinout.py b/python/api/dataflow/inout/simplefifoinout.py index 1f6ae01a44..494a69178b 100644 --- a/python/api/dataflow/inout/simplefifoinout.py +++ b/python/api/dataflow/inout/simplefifoinout.py @@ -28,8 +28,8 @@ def __init__( out_strides: Optional[list[int]] = None, dtype: np.generic = np.uint8, ): - assert bytes_in % np.prod(fifo_in.obj_type[0]) == 0 - assert bytes_out % np.prod(fifo_out.obj_type[0]) == 0 + assert bytes_in % np.prod(fifo_in.obj_type.shape) == 0 + assert bytes_out % np.prod(fifo_in.obj_type.shape) == 0 assert bytes_in > 0 assert bytes_out > 0 @@ -91,8 +91,8 @@ def resolve( loc: ir.Location = None, ip: ir.InsertionPoint = None, ) -> None: - tensor_in_ty = MyTensorType(self.bytes_in, self.dtype).memref_type - tensor_out_ty = MyTensorType(self.bytes_out, self.dtype).memref_type + tensor_in_ty = MyTensorType(self.dtype, [self.bytes_in]).memref_type + tensor_out_ty = MyTensorType(self.dtype, [self.bytes_out]).memref_type @runtime_sequence(tensor_in_ty, tensor_out_ty) def sequence(inTensor, outTensor): diff --git a/python/api/dataflow/objectfifo.py b/python/api/dataflow/objectfifo.py index a8a3fa67a5..095d53dd29 100644 --- a/python/api/dataflow/objectfifo.py +++ b/python/api/dataflow/objectfifo.py @@ -37,8 +37,8 @@ def __init__( name: str = None, end1: MyObjectFifoEndpoint = None, end2: MyObjectFifoEndpoint = None, - dimensionsToStream=list[list[int]], # TODO(erika): needs a type - dimensionsFromStreamPerConsumer=list[list[int]], # TODO(erika): needs a type + dimensionsToStream=None, # TODO(erika): needs a type + dimensionsFromStreamPerConsumer=None, # TODO(erika): needs a type ): self.__depth = depth self.__obj_type = obj_type @@ -86,7 +86,7 @@ def resolve( if self.__op == None: assert self.end1 != None, "ObjectFifo missing endpoint 1" assert self.end2 != None, "ObjectFifo missing endpoint 2" - assert self.__memref_type != None, "ObjectFifo missing memref_type" + assert self.__obj_type != None, "ObjectFifo missing object type" self.__op = object_fifo( self.name, self.end1.get_tile().op, diff --git a/python/api/kernels/binkernel.py b/python/api/kernels/binkernel.py index 4fead7927c..d55193dc93 100644 --- a/python/api/kernels/binkernel.py +++ b/python/api/kernels/binkernel.py @@ -38,15 +38,10 @@ def resolve( if self.__op == None: resolved_inout_types = [] for t in self.__inout_types: - try: + if isinstance(t, MyTensorType): + dtype = t.memref_type + else: dtype = np_dtype_to_mlir_type(t) - except Exception: - dtype = get_arg_types(t) - if dtype is None: - # Interpret as a dummy memref - dtype = MemRefType.get( - shape=t[0], element_type=np_dtype_to_mlir_type(t[1]) - ) resolved_inout_types.append(dtype) self.__op = external_func(self.__name, inputs=resolved_inout_types) diff --git a/python/api/tensor.py b/python/api/tensor.py index 2764c35125..8d3bc3fb90 100644 --- a/python/api/tensor.py +++ b/python/api/tensor.py @@ -28,4 +28,6 @@ def dtype(self) -> np.generic: def __eq__(self, other): # TODO: may want to be equal to numpy datatypes as well?? - return self.__my_numpy_type == other.__my_numpy_type + if other: + return self.__my_numpy_type == other.__my_numpy_type + return False diff --git a/python/api/worker.py b/python/api/worker.py index d357635460..7e574e8f04 100644 --- a/python/api/worker.py +++ b/python/api/worker.py @@ -20,7 +20,7 @@ class MyWorker(MyObjectFifoEndpoint): def __init__( self, - core_fn: Optional[Callable[[Union[ObjectFifoHandle, MyKernel], None]]], + core_fn: Optional[Callable[[Union[ObjectFifoHandle, MyKernel]], None]], fn_args: list[Union[ObjectFifoHandle, MyKernel]] = [], coords: tuple[int, int] = None, ):