From c0a4daac7ac45607cca434690c77d99c4c70681d Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Tue, 5 Mar 2024 12:40:40 +0100 Subject: [PATCH] Cleanup wrappers for Object FIFO python bindings (#1091) --- python/dialects/aie.py | 78 ++-- .../ipu-xrt/add_one_objFifo/aie2.py | 26 +- .../ipu-xrt/log_hello_world/hello_world.py | 27 +- .../ipu-xrt/matrix_multiplication/aie2.py | 59 ++- .../matrix_multiplication_array/aie2.py | 133 +++---- .../matrix_multiplication_column/aie2.py | 87 ++--- .../matrix_vector_multiplication/aie2.py | 104 ++--- .../ipu-xrt/passthrough_hardware/aie2.py | 7 +- .../ipu-xrt/vector_scalar/aie2.py | 23 +- .../color_detect/aie2_colorDetect.py | 234 +++--------- .../color_threshold/aie2_colorThreshold.py | 138 +++---- .../edge_detect/aie2_edgeDetect.py | 179 ++++----- .../vision_pipelines/passthrough/aie2.py | 21 +- test/python/aie_ops.py | 102 ++--- test/python/code_region.py | 39 +- test/python/core_ext_kernel.py | 39 +- test/python/ipu.py | 354 +++++------------- test/python/objFifo.py | 21 +- test/python/objFifo_link.py | 30 +- test/python/simple_with_bindings.py | 4 +- 20 files changed, 593 insertions(+), 1112 deletions(-) diff --git a/python/dialects/aie.py b/python/dialects/aie.py index 25926d2d48..2d69e4fa3e 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -74,7 +74,7 @@ def external_func(name, inputs, outputs=None, visibility="private"): # Wrapper for func CallOp. -class Call(CallOp): +class call(CallOp): """Specialize CallOp class constructor to take python integers""" def __init__(self, calleeOrResults, inputs=[], input_types=[]): @@ -199,66 +199,80 @@ def __init__(self, shape, datatype, name=None, loc=None, ip=None): # Create an aie objectFifo between specified tiles, with given depth and memref datatype. # depth examples: 2, [2,2,7] -class OrderedObjectBuffer(ObjectFifoCreateOp): +class object_fifo(ObjectFifoCreateOp): def __init__( self, name, - tile0, - tile1, + producerTile, + consumerTiles, depth, datatype, dimensionsToStream=None, dimensionsFromStreamPerConsumer=None, ): + self.datatype = datatype + if not isinstance(consumerTiles, List): + consumerTiles = [consumerTiles] if dimensionsFromStreamPerConsumer is None: dimensionsFromStreamPerConsumer = [] if dimensionsToStream is None: dimensionsToStream = [] int_ty = IntegerType.get_signless(32) - if isinstance(depth, int): - int_depth = IntegerAttr.get(int_ty, depth) - else: - int_depths = [] - for d in depth: - int_depths.append(IntegerAttr.get(int_ty, d)) - int_depth = ArrayAttr.get(int_depths) - of_Ty = ObjectFifoType.get(datatype) + of_Ty = TypeAttr.get(ObjectFifoType.get(datatype)) super().__init__( sym_name=name, - producerTile=tile0, - consumerTiles=tile1, - elemNumber=int_depth, - elem_type=TypeAttr.get(of_Ty), + producerTile=producerTile, + consumerTiles=consumerTiles, + elemNumber=depth, + elemType=of_Ty, dimensionsToStream=dimensionsToStream, dimensionsFromStreamPerConsumer=dimensionsFromStreamPerConsumer, ) + def acquire(self, port, num_elem): + subview_t = ObjectFifoSubviewType.get(self.datatype) + acq = ObjectFifoAcquireOp(subview_t, port, self.sym_name.value, num_elem) -# Create an aie objectFifo acquire op of given number of elements with given memref datatype, -# from objFifo with given name. -class ObjectFifoAcquireOp(ObjectFifoAcquireOp): - def __init__(self, port, of_name, num_elem, datatype): - subview_t = ObjectFifoSubviewType.get(datatype) - self.datatype = datatype - super().__init__(subview_t, port, of_name, num_elem) - - def acquired_elem(self): objects = [] - if self.size.value == 1: + if acq.size.value == 1: return ObjectFifoSubviewAccessOp( - self.datatype, self.subview, self.size.value - 1 + self.datatype, acq.subview, acq.size.value - 1 ) - for i in range(self.size.value): - objects.append(ObjectFifoSubviewAccessOp(self.datatype, self.subview, i)) + for i in range(acq.size.value): + objects.append(ObjectFifoSubviewAccessOp(self.datatype, acq.subview, i)) return objects + def release(self, port, num_elem): + return objectfifo_release(port, self.sym_name.value, num_elem) -def acquire(port, of_name, num_elem, datatype): - return ObjectFifoAcquireOp(port, of_name, num_elem, datatype) + +# Create an aie objectFifo_link between input and output objectFifos. +class object_fifo_link(ObjectFifoLinkOp): + """Specialize ObjectFifoLinkOp class constructor to take python variables""" + + def __init__( + self, + fifoIns, + fifoOuts, + ): + if not isinstance(fifoIns, List): + fifoIns = [fifoIns] + if not isinstance(fifoOuts, List): + fifoOuts = [fifoOuts] + fifoInRefs = map( + lambda i: i if isinstance(i, str) else i.sym_name.value, fifoIns + ) + fifoOutRefs = map( + lambda i: i if isinstance(i, str) else i.sym_name.value, fifoOuts + ) + super().__init__( + fifoIns=fifoInRefs, + fifoOuts=fifoOutRefs, + ) # Create a packet flow between source and destination tile ports. -class PacketFlow(PacketFlowOp): +class packetflow(PacketFlowOp): """Specialize PacketFlowOp class constructor to take python integers""" def __init__( diff --git a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py index 9c23934c15..dc6d37ff96 100644 --- a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py +++ b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py @@ -19,8 +19,6 @@ def my_add_one_objFifo(): def device_body(): memRef_16_ty = T.memref(16, T.i32()) memRef_8_ty = T.memref(8, T.i32()) - ofifo_memRef_16_ty = TypeAttr.get(ObjectFifoType.get(memRef_16_ty)) - ofifo_memRef_8_ty = TypeAttr.get(ObjectFifoType.get(memRef_8_ty)) # Tile declarations ShimTile = tile(0, 0) @@ -29,14 +27,14 @@ def device_body(): # AIE-array data movement with object fifos # Input - objectfifo("in0", ShimTile, [MemTile], 2, ofifo_memRef_16_ty, [], []) - objectfifo("in1", MemTile, [ComputeTile2], 2, ofifo_memRef_8_ty, [], []) - objectfifo_link(["in0"], ["in1"]) + of_in0 = object_fifo("in0", ShimTile, MemTile, 2, memRef_16_ty) + of_in1 = object_fifo("in1", MemTile, ComputeTile2, 2, memRef_8_ty) + object_fifo_link(of_in0, of_in1) # Output - objectfifo("out0", MemTile, [ShimTile], 2, ofifo_memRef_16_ty, [], []) - objectfifo("out1", ComputeTile2, [MemTile], 2, ofifo_memRef_8_ty, [], []) - objectfifo_link(["out1"], ["out0"]) + of_out0 = object_fifo("out0", MemTile, ShimTile, 2, memRef_16_ty) + of_out1 = object_fifo("out1", ComputeTile2, MemTile, 2, memRef_8_ty) + object_fifo_link(of_out1, of_out0) # Set up compute tiles @@ -45,19 +43,15 @@ def device_body(): def core_body(): # Effective while(1) for _ in for_(8): - elem_in = acquire( - ObjectFifoPort.Consume, "in1", 1, memRef_8_ty - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "out1", 1, memRef_8_ty - ).acquired_elem() + elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1) + elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1) for i in for_(8): v0 = memref.load(elem_in, [i]) v1 = arith.addi(v0, arith.constant(1, T.i32())) memref.store(v1, elem_out, [i]) yield_([]) - objectfifo_release(ObjectFifoPort.Consume, "in1", 1) - objectfifo_release(ObjectFifoPort.Produce, "out1", 1) + of_in1.release(ObjectFifoPort.Consume, 1) + of_out1.release(ObjectFifoPort.Produce, 1) yield_([]) # To/from AIE-array data movement diff --git a/reference_designs/ipu-xrt/log_hello_world/hello_world.py b/reference_designs/ipu-xrt/log_hello_world/hello_world.py index 729feba511..b017d110b7 100644 --- a/reference_designs/ipu-xrt/log_hello_world/hello_world.py +++ b/reference_designs/ipu-xrt/log_hello_world/hello_world.py @@ -18,7 +18,6 @@ def printf(): @device(AIEDevice.ipu) def device_body(): memRef_ty = T.memref(N, T.i32()) - ofifo_memRef_ty = TypeAttr.get(ObjectFifoType.get(memRef_ty)) # AIE Core Function declarations kernel = external_func("kernel", inputs=[memRef_ty, memRef_ty, memRef_ty]) @@ -28,28 +27,22 @@ def device_body(): ComputeTile2 = tile(0, 2) # AIE-array data movement with object fifos - objectfifo("inOF", ShimTile, [ComputeTile2], 2, ofifo_memRef_ty, [], []) - objectfifo("outOF", ComputeTile2, [ShimTile], 2, ofifo_memRef_ty, [], []) - objectfifo("logoutOF", ComputeTile2, [ShimTile], 2, ofifo_memRef_ty, [], []) + inOF = object_fifo("inOF", ShimTile, ComputeTile2, 2, memRef_ty) + outOF = object_fifo("outOF", ComputeTile2, ShimTile, 2, memRef_ty) + logoutOF = object_fifo("logoutOF", ComputeTile2, ShimTile, 2, memRef_ty) # Set up compute tiles # Compute tile 2 @core(ComputeTile2, "kernel.o") def core_body(): - elemOut = acquire( - ObjectFifoPort.Produce, "outOF", 1, memRef_ty - ).acquired_elem() - elemIn = acquire( - ObjectFifoPort.Consume, "inOF", 1, memRef_ty - ).acquired_elem() - elemLogout = acquire( - ObjectFifoPort.Produce, "logoutOF", 1, memRef_ty - ).acquired_elem() - Call(kernel, [elemIn, elemOut, elemLogout]) - objectfifo_release(ObjectFifoPort.Consume, "inOF", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOF", 1) - objectfifo_release(ObjectFifoPort.Produce, "logoutOF", 1) + elemOut = outOF.acquire(ObjectFifoPort.Produce, 1) + elemIn = inOF.acquire(ObjectFifoPort.Consume, 1) + elemLogout = logoutOF.acquire(ObjectFifoPort.Produce, 1) + call(kernel, [elemIn, elemOut, elemLogout]) + inOF.release(ObjectFifoPort.Consume, 1) + outOF.release(ObjectFifoPort.Produce, 1) + logoutOF.release(ObjectFifoPort.Produce, 1) # To/from AIE-array data movement @FuncOp.from_py_func(memRef_ty, memRef_ty, memRef_ty) diff --git a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py index 8521a83386..5e9dbeab9c 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py @@ -83,58 +83,55 @@ def device_body(): # AIE-array data movement with object fifos # Input A - objectfifo("inA", shim_tile, [mem_tile], 2, ofifo_memref_a_ty, [], []) - objectfifo( + inA = object_fifo("inA", shim_tile, mem_tile, 2, memref_a_ty) + memA = object_fifo( "memA", mem_tile, - [compute_tile2], + compute_tile2, 2, - ofifo_memref_a_ty, + memref_a_ty, [ (m // r, r * k * word_size_in // 4), (k // s, s * word_size_in // 4), (r, k * word_size_in // 4), (s * word_size_in // 4, 1), ], - [], ) - objectfifo_link(["inA"], ["memA"]) + object_fifo_link(inA, memA) # Input B - objectfifo("inB", shim_tile, [mem_tile], 2, ofifo_memref_b_ty, [], []) - objectfifo( + inB = object_fifo("inB", shim_tile, mem_tile, 2, memref_b_ty) + memB = object_fifo( "memB", mem_tile, - [compute_tile2], + compute_tile2, 2, - ofifo_memref_b_ty, + memref_b_ty, [ (k // s, s * n * word_size_in // 4), (n // t, t * word_size_in // 4), (s, n * word_size_in // 4), (t * word_size_in // 4, 1), ], - [], ) - objectfifo_link(["inB"], ["memB"]) + object_fifo_link(inB, memB) # Output C - objectfifo("memC", compute_tile2, [mem_tile], 2, ofifo_memref_c_ty, [], []) - objectfifo( + memC = object_fifo("memC", compute_tile2, mem_tile, 2, memref_c_ty) + outC = object_fifo( "outC", mem_tile, - [shim_tile], + shim_tile, 2, - ofifo_memref_c_ty, + memref_c_ty, [ (m // r, r * n * word_size_out // 4), (r, t * word_size_out // 4), (n // t, r * t * word_size_out // 4), (t * word_size_out // 4, 1), ], - [], ) - objectfifo_link(["memC"], ["outC"]) + object_fifo_link(memC, outC) # Set up a circuit-switched flow from core to shim for tracing information if enable_tracing: @@ -147,30 +144,24 @@ def device_body(): def core_body(): for _ in for_(0xFFFFFFFF): for _ in for_(tiles): - elem_out = acquire( - ObjectFifoPort.Produce, "memC", 1, memref_c_ty - ).acquired_elem() + elem_out = memC.acquire(ObjectFifoPort.Produce, 1) if vectorized: - Call(zero, [elem_out]) + call(zero, [elem_out]) else: - Call(zero_scalar, [elem_out]) + call(zero_scalar, [elem_out]) for _ in for_(K_div_k): - elem_in_a = acquire( - ObjectFifoPort.Consume, "memA", 1, memref_a_ty - ).acquired_elem() - elem_in_b = acquire( - ObjectFifoPort.Consume, "memB", 1, memref_b_ty - ).acquired_elem() + elem_in_a = memA.acquire(ObjectFifoPort.Consume, 1) + elem_in_b = memB.acquire(ObjectFifoPort.Consume, 1) if vectorized: - Call(matmul, [elem_in_a, elem_in_b, elem_out]) + call(matmul, [elem_in_a, elem_in_b, elem_out]) else: - Call(matmul_scalar, [elem_in_a, elem_in_b, elem_out]) - objectfifo_release(ObjectFifoPort.Consume, "memA", 1) - objectfifo_release(ObjectFifoPort.Consume, "memB", 1) + call(matmul_scalar, [elem_in_a, elem_in_b, elem_out]) + memA.release(ObjectFifoPort.Consume, 1) + memB.release(ObjectFifoPort.Consume, 1) yield_([]) - objectfifo_release(ObjectFifoPort.Produce, "memC", 1) + memC.release(ObjectFifoPort.Produce, 1) yield_([]) yield_([]) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py index 42fec32aca..f2993bfa99 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py @@ -71,13 +71,6 @@ def device_body(): memRef_B_ty = T.memref(k, n, T.bf16()) memRef_C_ty = T.memref(m, n, T.bf16()) - ofifo_memRef_inA_ty = TypeAttr.get(ObjectFifoType.get(memRef_inA_ty)) - ofifo_memRef_inB_ty = TypeAttr.get(ObjectFifoType.get(memRef_inB_ty)) - ofifo_memRef_outC_ty = TypeAttr.get(ObjectFifoType.get(memRef_outC_ty)) - ofifo_memRef_A_ty = TypeAttr.get(ObjectFifoType.get(memRef_A_ty)) - ofifo_memRef_B_ty = TypeAttr.get(ObjectFifoType.get(memRef_B_ty)) - ofifo_memRef_C_ty = TypeAttr.get(ObjectFifoType.get(memRef_C_ty)) - # AIE Core Function declarations zero_scalar = external_func("zero_scalar_bf16", inputs=[memRef_C_ty]) zero = external_func("zero_bf16", inputs=[memRef_C_ty]) @@ -149,87 +142,105 @@ def device_body(): t_cores = [ [cores[j][i] for j in range(len(cores))] for i in range(len(cores[0])) ] - inA_fifos = ["inA0", "inA1", "inA2", "inA3"] - inB_fifos = ["inB0", "inB1", "inB2", "inB3"] - memA_fifos = ["memA0", "memA1", "memA2", "memA3"] - memB_fifos = ["memB0", "memB1", "memB2", "memB3"] - _0_outC_fifos = ["memC00", "memC10", "memC20", "memC30"] - _1_outC_fifos = ["memC01", "memC11", "memC21", "memC31"] - _2_outC_fifos = ["memC02", "memC12", "memC22", "memC32"] - _3_outC_fifos = ["memC03", "memC13", "memC23", "memC33"] + inA_fifo_names = ["inA0", "inA1", "inA2", "inA3"] + inA_fifos = {} + inB_fifo_names = ["inB0", "inB1", "inB2", "inB3"] + inB_fifos = {} + memA_fifo_names = ["memA0", "memA1", "memA2", "memA3"] + memA_fifos = {} + memB_fifo_names = ["memB0", "memB1", "memB2", "memB3"] + memB_fifos = {} + _0_outC_fifo_names = ["memC00", "memC10", "memC20", "memC30"] + _0_outC_fifos = {} + _1_outC_fifo_names = ["memC01", "memC11", "memC21", "memC31"] + _1_outC_fifos = {} + _2_outC_fifo_names = ["memC02", "memC12", "memC22", "memC32"] + _2_outC_fifos = {} + _3_outC_fifo_names = ["memC03", "memC13", "memC23", "memC33"] + _3_outC_fifos = {} + memC_fifo_names = [ + _0_outC_fifo_names, + _1_outC_fifo_names, + _2_outC_fifo_names, + _3_outC_fifo_names, + ] memC_fifos = [_0_outC_fifos, _1_outC_fifos, _2_outC_fifos, _3_outC_fifos] - outC_fifos = ["outC0", "outC1", "outC2", "outC3"] + outC_fifo_names = ["outC0", "outC1", "outC2", "outC3"] + outC_fifos = {} # AIE-array data movement with object fifos # Input A for i in range(n_cols): - objectfifo( - inA_fifos[i], shims[i], [mems[i]], 2, ofifo_memRef_inA_ty, [], [] + inA_fifos[inA_fifo_names[i]] = object_fifo( + inA_fifo_names[i], + shims[i], + mems[i], + 2, + memRef_inA_ty, ) - objectfifo( - memA_fifos[i], + memA_fifos[memA_fifo_names[i]] = object_fifo( + memA_fifo_names[i], mems[i], t_cores[i][0:n_cols], 2, - ofifo_memRef_A_ty, + memRef_A_ty, [ (m // r, r * k * word_size_in // 4), (k // s, s * word_size_in // 4), (r, k * word_size_in // 4), (s * word_size_in // 4, 1), ], - [], ) - objectfifo_link([inA_fifos[i]], [memA_fifos[i]]) + object_fifo_link(inA_fifo_names[i], memA_fifo_names[i]) # Input B for i in range(n_cols): - objectfifo( - inB_fifos[i], shims[i], [mems[i]], 2, ofifo_memRef_inB_ty, [], [] + inB_fifos[inB_fifo_names[i]] = object_fifo( + inB_fifo_names[i], + shims[i], + mems[i], + 2, + memRef_inB_ty, ) - objectfifo( - memB_fifos[i], + memB_fifos[memB_fifo_names[i]] = object_fifo( + memB_fifo_names[i], mems[i], cores[i][0:n_rows], 2, - ofifo_memRef_B_ty, + memRef_B_ty, [ (k // s, s * n * word_size_in // 4), (n // t, t * word_size_in // 4), (s, n * word_size_in // 4), (t * word_size_in // 4, 1), ], - [], ) - objectfifo_link([inB_fifos[i]], [memB_fifos[i]]) + object_fifo_link(inB_fifo_names[i], memB_fifo_names[i]) # Output C for i in range(n_cols): for j in range(n_rows): - objectfifo( - memC_fifos[i][j], + memC_fifos[i][memC_fifo_names[i][j]] = object_fifo( + memC_fifo_names[i][j], cores[i][j], - [mems[i]], + mems[i], 2, - ofifo_memRef_C_ty, - [], - [], + memRef_C_ty, ) - objectfifo( - outC_fifos[i], + outC_fifos[outC_fifo_names[i]] = object_fifo( + outC_fifo_names[i], mems[i], shims[i], 2, - ofifo_memRef_outC_ty, + memRef_outC_ty, [ (m // r, r * n * word_size_out // 4), (r, t * word_size_out // 4), (n // t, r * t * word_size_out // 4), (t * word_size_out // 4, 1), ], - [], ) - objectfifo_link(memC_fifos[i], [outC_fifos[i]]) + object_fifo_link(memC_fifo_names[i], outC_fifo_names[i]) # Set up compute tiles for j in range(n_cols): @@ -239,38 +250,32 @@ def device_body(): def core_body(): for _ in for_(0xFFFFFFFF): for _ in for_(tiles): - elem_out = acquire( + elem_out = memC_fifos[j][memC_fifo_names[j][i]].acquire( ObjectFifoPort.Produce, - memC_fifos[j][i], 1, - memRef_C_ty, - ).acquired_elem() - Call(zero, [elem_out]) + ) + call(zero, [elem_out]) for _ in for_(K_div_k): - elem_in_a = acquire( + elem_in_a = memA_fifos[memA_fifo_names[i]].acquire( ObjectFifoPort.Consume, - memA_fifos[i], 1, - memRef_A_ty, - ).acquired_elem() - elem_in_b = acquire( + ) + elem_in_b = memB_fifos[memB_fifo_names[j]].acquire( ObjectFifoPort.Consume, - memB_fifos[j], 1, - memRef_B_ty, - ).acquired_elem() - Call(matmul, [elem_in_a, elem_in_b, elem_out]) - objectfifo_release( - ObjectFifoPort.Consume, memA_fifos[i], 1 ) - objectfifo_release( - ObjectFifoPort.Consume, memB_fifos[j], 1 + call(matmul, [elem_in_a, elem_in_b, elem_out]) + memA_fifos[memA_fifo_names[i]].release( + ObjectFifoPort.Consume, 1 + ) + memB_fifos[memB_fifo_names[j]].release( + ObjectFifoPort.Consume, 1 ) yield_([]) - objectfifo_release( - ObjectFifoPort.Produce, memC_fifos[j][i], 1 + memC_fifos[j][memC_fifo_names[j][i]].release( + ObjectFifoPort.Produce, 1 ) yield_([]) yield_([]) @@ -301,7 +306,7 @@ def sequence(A, B, C): C_col_offset = i * n * word_size_out C_offset_in_i32s = (C_col_offset + C_row_offset) // 4 ipu_dma_memcpy_nd( - metadata=outC_fifos[i], + metadata=outC_fifo_names[i], bd_id=0, mem=C, offsets=[0, 0, 0, C_offset_in_i32s], @@ -329,7 +334,7 @@ def sequence(A, B, C): A_col_offset_in_i32s = i * m * K * word_size_in // 4 B_col_offset_in_i32s = i * n * word_size_in // 4 ipu_dma_memcpy_nd( - metadata=inA_fifos[i], + metadata=inA_fifo_names[i], bd_id=2 * tile_row + 1, mem=A, offsets=[ @@ -342,7 +347,7 @@ def sequence(A, B, C): strides=[0, k_in_i32s, K_in_i32s], ) ipu_dma_memcpy_nd( - metadata=inB_fifos[i], + metadata=inB_fifo_names[i], bd_id=2 * tile_row + 2, mem=B, offsets=[0, 0, 0, B_col_offset_in_i32s], diff --git a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py index 3d37e04716..5838dc598b 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py @@ -66,13 +66,6 @@ def device_body(): memRef_B_ty = T.memref(k, n, T.bf16()) memRef_C_ty = T.memref(m, n, T.f32()) - ofifo_memRef_inA_ty = TypeAttr.get(ObjectFifoType.get(memRef_inA_ty)) - ofifo_memRef_inB_ty = TypeAttr.get(ObjectFifoType.get(memRef_inB_ty)) - ofifo_memRef_outC_ty = TypeAttr.get(ObjectFifoType.get(memRef_outC_ty)) - ofifo_memRef_A_ty = TypeAttr.get(ObjectFifoType.get(memRef_A_ty)) - ofifo_memRef_B_ty = TypeAttr.get(ObjectFifoType.get(memRef_B_ty)) - ofifo_memRef_C_ty = TypeAttr.get(ObjectFifoType.get(memRef_C_ty)) - # AIE Core Function declarations zero_scalar = external_func("zero_scalar_f32", inputs=[memRef_C_ty]) zero = external_func("zero_f32", inputs=[memRef_C_ty]) @@ -91,68 +84,68 @@ def device_body(): ComputeTile4 = tile(0, 4) ComputeTile5 = tile(0, 5) cores = [ComputeTile2, ComputeTile3, ComputeTile4, ComputeTile5] - inA_fifos = ["memA0", "memA1", "memA2", "memA3"] - inB_fifos = ["memB"] - outC_fifos = ["memC0", "memC1", "memC2", "memC3"] + inA_fifo_names = ["memA0", "memA1", "memA2", "memA3"] + inA_fifos = {} + inB_fifo_names = ["memB"] + inB_fifos = {} + outC_fifo_names = ["memC0", "memC1", "memC2", "memC3"] + outC_fifos = {} # AIE-array data movement with object fifos # Input A - objectfifo("inA", ShimTile, [MemTile], 2, ofifo_memRef_inA_ty, [], []) + inA = object_fifo("inA", ShimTile, MemTile, 2, memRef_inA_ty) for i in range(n_cores): - objectfifo( - inA_fifos[i], + inA_fifos[inA_fifo_names[i]] = object_fifo( + inA_fifo_names[i], MemTile, - [cores[i]], + cores[i], 2, - ofifo_memRef_A_ty, + memRef_A_ty, [ (m // r, r * k * word_size_in // 4), (k // s, s * word_size_in // 4), (r, k * word_size_in // 4), (s * word_size_in // 4, 1), ], - [], ) - objectfifo_link(["inA"], inA_fifos[0:n_cores]) + object_fifo_link(inA, inA_fifo_names[0:n_cores]) # Input B - objectfifo("inB", ShimTile, [MemTile], 2, ofifo_memRef_inB_ty, [], []) - objectfifo( - inB_fifos[0], + inB = object_fifo("inB", ShimTile, MemTile, 2, memRef_inB_ty) + inB_fifos[inB_fifo_names[0]] = object_fifo( + inB_fifo_names[0], MemTile, cores[0:n_cores], 2, - ofifo_memRef_B_ty, + memRef_B_ty, [ (k // s, s * n * word_size_in // 4), (n // t, t * word_size_in // 4), (s, n * word_size_in // 4), (t * word_size_in // 4, 1), ], - [], ) - objectfifo_link(["inB"], [inB_fifos[0]]) + object_fifo_link(inB, [inB_fifo_names[0]]) # Output C for i in range(n_cores): - objectfifo( - outC_fifos[i], cores[i], [MemTile], 2, ofifo_memRef_C_ty, [], [] + outC_fifos[outC_fifo_names[i]] = object_fifo( + outC_fifo_names[i], cores[i], MemTile, 2, memRef_C_ty ) - objectfifo( + outC = object_fifo( "outC", MemTile, - [ShimTile], + ShimTile, 2, - ofifo_memRef_outC_ty, + memRef_outC_ty, [ (m // r, r * n * word_size_out // 4), (r, t * word_size_out // 4), (n // t, r * t * word_size_out // 4), (t * word_size_out // 4, 1), ], - [], ) - objectfifo_link(outC_fifos[0:n_cores], ["outC"]) + object_fifo_link(outC_fifo_names[0:n_cores], outC) # Set up compute tiles for i in range(n_cores): @@ -161,28 +154,30 @@ def device_body(): def core_body(): for _ in for_(0xFFFFFFFF): for _ in for_(tiles): - elem_out = acquire( - ObjectFifoPort.Produce, outC_fifos[i], 1, memRef_C_ty - ).acquired_elem() - Call(zero, [elem_out]) + elem_out = outC_fifos[outC_fifo_names[i]].acquire( + ObjectFifoPort.Produce, 1 + ) + call(zero, [elem_out]) for _ in for_(K_div_k): - elem_in_a = acquire( - ObjectFifoPort.Consume, inA_fifos[i], 1, memRef_A_ty - ).acquired_elem() - elem_in_b = acquire( - ObjectFifoPort.Consume, inB_fifos[0], 1, memRef_B_ty - ).acquired_elem() - Call(matmul, [elem_in_a, elem_in_b, elem_out]) - objectfifo_release( - ObjectFifoPort.Consume, inA_fifos[i], 1 + elem_in_a = inA_fifos[inA_fifo_names[i]].acquire( + ObjectFifoPort.Consume, 1 + ) + elem_in_b = inB_fifos[inB_fifo_names[0]].acquire( + ObjectFifoPort.Consume, 1 + ) + call(matmul, [elem_in_a, elem_in_b, elem_out]) + inA_fifos[inA_fifo_names[i]].release( + ObjectFifoPort.Consume, 1 ) - objectfifo_release( - ObjectFifoPort.Consume, inB_fifos[0], 1 + inB_fifos[inB_fifo_names[0]].release( + ObjectFifoPort.Consume, 1 ) yield_([]) - objectfifo_release(ObjectFifoPort.Produce, outC_fifos[i], 1) + outC_fifos[outC_fifo_names[i]].release( + ObjectFifoPort.Produce, 1 + ) yield_([]) yield_([]) diff --git a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py index a9cb6b427c..fb4a8e97e8 100644 --- a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py @@ -49,11 +49,6 @@ def device_body(): memRef_outC_ty = T.memref(m, T.f32()) memRef_A_ty = T.memref(m, k, T.bf16()) - ofifo_memRef_inA_ty = TypeAttr.get(ObjectFifoType.get(memRef_inA_ty)) - ofifo_memRef_inB_ty = TypeAttr.get(ObjectFifoType.get(memRef_inB_ty)) - ofifo_memRef_outC_ty = TypeAttr.get(ObjectFifoType.get(memRef_outC_ty)) - ofifo_memRef_A_ty = TypeAttr.get(ObjectFifoType.get(memRef_A_ty)) - # AIE Core Function declarations zero_scalar = external_func("zero_scalar_f32", inputs=[memRef_outC_ty]) zero = external_func("zero_vectorized_f32", inputs=[memRef_outC_ty]) @@ -82,59 +77,58 @@ def device_body(): ComputeTile2 = tile(2, 2) ComputeTile3 = tile(3, 2) cores = [ComputeTile0, ComputeTile1, ComputeTile2, ComputeTile3] - memA_fifos = ["memA0", "memA1", "memA2", "memA3"] - inA_fifos = ["inA0", "inA1", "inA2", "inA3"] - inB_fifos = ["inB"] - outC_fifos = ["outC0", "outC1", "outC2", "outC3"] + memA_fifo_names = ["memA0", "memA1", "memA2", "memA3"] + memA_fifos = {} + inA_fifo_names = ["inA0", "inA1", "inA2", "inA3"] + inA_fifos = {} + inB_fifo_names = ["inB"] + inB_fifos = {} + outC_fifo_names = ["outC0", "outC1", "outC2", "outC3"] + outC_fifos = {} # AIE-array data movement with object fifos # Input A for i in range(n_cores): - objectfifo( - memA_fifos[i], + memA_fifos[memA_fifo_names[i]] = object_fifo( + memA_fifo_names[i], ShimTiles[i], - [MemTiles[i]], + MemTiles[i], 2, - ofifo_memRef_inA_ty, - [], - [], + memRef_inA_ty, ) - objectfifo( - inA_fifos[i], + inA_fifos[inA_fifo_names[i]] = object_fifo( + inA_fifo_names[i], MemTiles[i], - [cores[i]], + cores[i], 2, - ofifo_memRef_A_ty, + memRef_A_ty, [ (k_in_i32s, 1), (m, k_in_i32s), (1, 1), ], - [], ) - objectfifo_link([memA_fifos[i]], [inA_fifos[i]]) + object_fifo_link( + memA_fifos[memA_fifo_names[i]], inA_fifos[inA_fifo_names[i]] + ) # Input B - objectfifo( - inB_fifos[0], + inB_fifos[inB_fifo_names[0]] = object_fifo( + inB_fifo_names[0], ShimTiles[1 % n_cores], cores[0:n_cores], 2, - ofifo_memRef_inB_ty, - [], - [], + memRef_inB_ty, ) # Output C for i in range(n_cores): - objectfifo( - outC_fifos[i], + outC_fifos[outC_fifo_names[i]] = object_fifo( + outC_fifo_names[i], cores[i], - [ShimTiles[i]], + ShimTiles[i], 2, - ofifo_memRef_outC_ty, - [], - [], + memRef_outC_ty, ) # Set up compute tiles @@ -143,24 +137,36 @@ def device_body(): @core(cores[i], "mv.o") def core_body(): for _ in for_(0xFFFFFFFF): - elem_out = acquire( - ObjectFifoPort.Produce, outC_fifos[i], 1, memRef_outC_ty - ).acquired_elem() - Call(zero, [elem_out]) + elem_out = outC_fifos[outC_fifo_names[i]].acquire( + ObjectFifoPort.Produce, + 1, + ) + call(zero, [elem_out]) for _ in for_(K_div_k): - elem_in_a = acquire( - ObjectFifoPort.Consume, inA_fifos[i], 1, memRef_A_ty - ).acquired_elem() - elem_in_b = acquire( - ObjectFifoPort.Consume, inB_fifos[0], 1, memRef_inB_ty - ).acquired_elem() - Call(matvec, [elem_in_a, elem_in_b, elem_out]) - objectfifo_release(ObjectFifoPort.Consume, inA_fifos[i], 1) - objectfifo_release(ObjectFifoPort.Consume, inB_fifos[0], 1) + elem_in_a = inA_fifos[inA_fifo_names[i]].acquire( + ObjectFifoPort.Consume, + 1, + ) + elem_in_b = inB_fifos[inB_fifo_names[0]].acquire( + ObjectFifoPort.Consume, + 1, + ) + call(matvec, [elem_in_a, elem_in_b, elem_out]) + inA_fifos[inA_fifo_names[i]].release( + ObjectFifoPort.Consume, + 1, + ) + inB_fifos[inB_fifo_names[0]].release( + ObjectFifoPort.Consume, + 1, + ) yield_([]) - objectfifo_release(ObjectFifoPort.Produce, outC_fifos[i], 1) + outC_fifos[outC_fifo_names[i]].release( + ObjectFifoPort.Produce, + 1, + ) yield_([]) # To/from AIE-array data movement @@ -172,7 +178,7 @@ def core_body(): ) def sequence(A, B, C): ipu_dma_memcpy_nd( - metadata="inB", + metadata=inB_fifo_names[0], bd_id=2, mem=B, sizes=[M_div_m_div_n_cores, 1, 1, K_in_i32s], @@ -182,7 +188,7 @@ def sequence(A, B, C): A_offset = i * M_div_m_div_n_cores * m * K * word_size_in // 4 C_offset = i * M_div_m_div_n_cores * m * word_size_out // 4 ipu_dma_memcpy_nd( - metadata=memA_fifos[i], + metadata=memA_fifo_names[i], bd_id=1, mem=A, offsets=[0, 0, 0, A_offset], @@ -190,7 +196,7 @@ def sequence(A, B, C): strides=[m_x_K_in_i32s, k_in_i32s, K_in_i32s], ) ipu_dma_memcpy_nd( - metadata=outC_fifos[i], + metadata=outC_fifo_names[i], bd_id=0, mem=C, offsets=[0, 0, 0, C_offset], diff --git a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py index 773f1f8443..cf2e665af4 100755 --- a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py +++ b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py @@ -26,16 +26,15 @@ def my_passthrough(): @device(AIEDevice.ipu) def device_body(): memRef_ty = T.memref(1024, T.i32()) - ofifo_memRef_ty = TypeAttr.get(ObjectFifoType.get(memRef_ty)) # Tile declarations ShimTile = tile(0, 0) ComputeTile2 = tile(0, 2) # AIE-array data movement with object fifos - objectfifo("in", ShimTile, [ComputeTile2], 2, ofifo_memRef_ty, [], []) - objectfifo("out", ComputeTile2, [ShimTile], 2, ofifo_memRef_ty, [], []) - objectfifo_link(["in"], ["out"]) + of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) + object_fifo_link(of_in, of_out) # Set up compute tiles diff --git a/reference_designs/ipu-xrt/vector_scalar/aie2.py b/reference_designs/ipu-xrt/vector_scalar/aie2.py index 7ecffe8e5f..851eae41cc 100755 --- a/reference_designs/ipu-xrt/vector_scalar/aie2.py +++ b/reference_designs/ipu-xrt/vector_scalar/aie2.py @@ -25,7 +25,6 @@ def my_vector_scalar(): @device(AIEDevice.ipu) def device_body(): memRef_ty = T.memref(n, T.i32()) - ofifo_memRef_ty = TypeAttr.get(ObjectFifoType.get(memRef_ty)) # AIE Core Function declarations scale_int32 = external_func("scale_int32", inputs=[memRef_ty, memRef_ty]) @@ -35,12 +34,8 @@ def device_body(): ComputeTile2 = tile(0, 2) # AIE-array data movement with object fifos - objectfifo( - "in", ShimTile, [ComputeTile2], buffer_depth, ofifo_memRef_ty, [], [] - ) - objectfifo( - "out", ComputeTile2, [ShimTile], buffer_depth, ofifo_memRef_ty, [], [] - ) + of_in = object_fifo("in", ShimTile, ComputeTile2, buffer_depth, memRef_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, buffer_depth, memRef_ty) # Set up compute tiles @@ -51,15 +46,11 @@ def core_body(): for _ in for_(sys.maxsize): # Number of sub-vector "tile" iterations for _ in for_(N_div_n): - elem_out = acquire( - ObjectFifoPort.Produce, "out", 1, memRef_ty - ).acquired_elem() - elem_in = acquire( - ObjectFifoPort.Consume, "in", 1, memRef_ty - ).acquired_elem() - Call(scale_int32, [elem_in, elem_out]) - objectfifo_release(ObjectFifoPort.Consume, "in", 1) - objectfifo_release(ObjectFifoPort.Produce, "out", 1) + elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) + elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) + call(scale_int32, [elem_in, elem_out]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) yield_([]) yield_([]) diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py index af8785aa17..a14dc5f0b8 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py @@ -70,110 +70,36 @@ def deviceBody(): # AIE-array data movement with object fifos # Input - objectfifo( + inOF_L3L2 = object_fifo( "inOF_L3L2", ShimTile, [ComputeTile2, MemTile], [2, 2, 6], - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) - objectfifo( - "inOF_L2L1", - MemTile, - [ComputeTile5], - 6, - ofifo_line_bytes_ty, - [], - [], + inOF_L2L1 = object_fifo( + "inOF_L2L1", MemTile, ComputeTile5, 6, line_bytes_ty ) - objectfifo_link(["inOF_L3L2"], ["inOF_L2L1"]) + object_fifo_link(inOF_L3L2, inOF_L2L1) # Output - objectfifo( - "outOF_L2L3", - MemTile, - [ShimTile], - 2, - ofifo_line_bytes_ty, - [], - [], + outOF_L2L3 = object_fifo("outOF_L2L3", MemTile, ShimTile, 2, line_bytes_ty) + outOF_L1L2 = object_fifo( + "outOF_L1L2", ComputeTile5, MemTile, 2, line_bytes_ty ) - objectfifo( - "outOF_L1L2", - ComputeTile5, - [MemTile], - 2, - ofifo_line_bytes_ty, - [], - [], - ) - objectfifo_link(["outOF_L1L2"], ["outOF_L2L3"]) + object_fifo_link(outOF_L1L2, outOF_L2L3) # Intermediate - objectfifo( - "OF_2to34", - ComputeTile2, - [ComputeTile3, ComputeTile4], - 2, - ofifo_line_ty, - [], - [], - ) - objectfifo( - "OF_3to3", - ComputeTile3, - [ComputeTile3], - 1, - ofifo_line_ty, - [], - [], - ) - objectfifo( - "OF_3to5", - ComputeTile3, - [ComputeTile5], - 2, - ofifo_line_ty, - [], - [], - ) - objectfifo( - "OF_4to4", - ComputeTile4, - [ComputeTile4], - 1, - ofifo_line_ty, - [], - [], - ) - objectfifo( - "OF_4to5", - ComputeTile4, - [ComputeTile5], - 2, - ofifo_line_ty, - [], - [], - ) - objectfifo( - "OF_5to5a", - ComputeTile5, - [ComputeTile5], - 1, - ofifo_line_ty, - [], - [], + OF_2to34 = object_fifo( + "OF_2to34", ComputeTile2, [ComputeTile3, ComputeTile4], 2, line_ty ) - objectfifo( - "OF_5to5b", - ComputeTile5, - [ComputeTile5], - 1, - ofifo_line_bytes_ty, - [], - [], + OF_3to3 = object_fifo("OF_3to3", ComputeTile3, ComputeTile3, 1, line_ty) + OF_3to5 = object_fifo("OF_3to5", ComputeTile3, ComputeTile5, 2, line_ty) + OF_4to4 = object_fifo("OF_4to4", ComputeTile4, ComputeTile4, 1, line_ty) + OF_4to5 = object_fifo("OF_4to5", ComputeTile4, ComputeTile5, 2, line_ty) + OF_5to5a = object_fifo("OF_5to5a", ComputeTile5, ComputeTile5, 1, line_ty) + OF_5to5b = object_fifo( + "OF_5to5b", ComputeTile5, ComputeTile5, 1, line_bytes_ty ) # Set up compute tiles @@ -182,15 +108,11 @@ def deviceBody(): @core(ComputeTile2, "rgba2hue.cc.o") def coreBody(): for _ in range_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, "inOF_L3L2", 1, line_bytes_ty - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, "OF_2to34", 1, line_ty - ).acquired_elem() - Call(rgba2hueLine, [elemIn, elemOut, arith.constant(lineWidth)]) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L3L2", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_2to34", 1) + elemIn = inOF_L3L2.acquire(ObjectFifoPort.Consume, 1) + elemOut = OF_2to34.acquire(ObjectFifoPort.Produce, 1) + call(rgba2hueLine, [elemIn, elemOut, arith.constant(lineWidth)]) + inOF_L3L2.release(ObjectFifoPort.Consume, 1) + OF_2to34.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 3 @@ -202,13 +124,9 @@ def coreBody(): thresholdModeToZeroInv = arith.constant(4, T.i8()) thresholdModeBinary = arith.constant(0, T.i8()) for _ in range_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, "OF_2to34", 1, line_ty - ).acquired_elem() - elemOutTmp = acquire( - ObjectFifoPort.Produce, "OF_3to3", 1, line_ty - ).acquired_elem() - Call( + elemIn = OF_2to34.acquire(ObjectFifoPort.Consume, 1) + elemOutTmp = OF_3to3.acquire(ObjectFifoPort.Produce, 1) + call( thresholdLine, [ elemIn, @@ -219,15 +137,11 @@ def coreBody(): thresholdModeToZeroInv, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to34", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to3", 1) - elemInTmp = acquire( - ObjectFifoPort.Consume, "OF_3to3", 1, line_ty - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, "OF_3to5", 1, line_ty - ).acquired_elem() - Call( + OF_2to34.release(ObjectFifoPort.Consume, 1) + OF_3to3.release(ObjectFifoPort.Produce, 1) + elemInTmp = OF_3to3.acquire(ObjectFifoPort.Consume, 1) + elemOut = OF_3to5.acquire(ObjectFifoPort.Produce, 1) + call( thresholdLine, [ elemInTmp, @@ -238,8 +152,8 @@ def coreBody(): thresholdModeBinary, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_3to3", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to5", 1) + OF_3to3.release(ObjectFifoPort.Consume, 1) + OF_3to5.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 4 @@ -251,13 +165,9 @@ def coreBody(): thresholdModeToZeroInv = arith.constant(4, T.i8()) thresholdModeBinary = arith.constant(0, T.i8()) for _ in range_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, "OF_2to34", 1, line_ty - ).acquired_elem() - elemOutTmp = acquire( - ObjectFifoPort.Produce, "OF_4to4", 1, line_ty - ).acquired_elem() - Call( + elemIn = OF_2to34.acquire(ObjectFifoPort.Consume, 1) + elemOutTmp = OF_4to4.acquire(ObjectFifoPort.Produce, 1) + call( thresholdLine, [ elemIn, @@ -268,15 +178,11 @@ def coreBody(): thresholdModeToZeroInv, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to34", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_4to4", 1) - elemInTmp = acquire( - ObjectFifoPort.Consume, "OF_4to4", 1, line_ty - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, "OF_4to5", 1, line_ty - ).acquired_elem() - Call( + OF_2to34.release(ObjectFifoPort.Consume, 1) + OF_4to4.release(ObjectFifoPort.Produce, 1) + elemInTmp = OF_4to4.acquire(ObjectFifoPort.Consume, 1) + elemOut = OF_4to5.acquire(ObjectFifoPort.Produce, 1) + call( thresholdLine, [ elemInTmp, @@ -287,8 +193,8 @@ def coreBody(): thresholdModeBinary, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_4to4", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_4to5", 1) + OF_4to4.release(ObjectFifoPort.Consume, 1) + OF_4to5.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 5 @@ -296,46 +202,30 @@ def coreBody(): def coreBody(): for _ in range_(sys.maxsize): # bitwise OR - elemIn1 = acquire( - ObjectFifoPort.Consume, "OF_3to5", 1, line_ty - ).acquired_elem() - elemIn2 = acquire( - ObjectFifoPort.Consume, "OF_4to5", 1, line_ty - ).acquired_elem() - elemOutTmpA = acquire( - ObjectFifoPort.Produce, "OF_5to5a", 1, line_ty - ).acquired_elem() - Call( + elemIn1 = OF_3to5.acquire(ObjectFifoPort.Consume, 1) + elemIn2 = OF_4to5.acquire(ObjectFifoPort.Consume, 1) + elemOutTmpA = OF_5to5a.acquire(ObjectFifoPort.Produce, 1) + call( bitwiseORLine, [elemIn1, elemIn2, elemOutTmpA, arith.constant(lineWidth)], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_3to5", 1) - objectfifo_release(ObjectFifoPort.Consume, "OF_4to5", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_5to5a", 1) + OF_3to5.release(ObjectFifoPort.Consume, 1) + OF_4to5.release(ObjectFifoPort.Consume, 1) + OF_5to5a.release(ObjectFifoPort.Produce, 1) # gray2rgba - elemInTmpA = acquire( - ObjectFifoPort.Consume, "OF_5to5a", 1, line_ty - ).acquired_elem() - elemOutTmpB = acquire( - ObjectFifoPort.Produce, "OF_5to5b", 1, line_bytes_ty - ).acquired_elem() - Call( + elemInTmpA = OF_5to5a.acquire(ObjectFifoPort.Consume, 1) + elemOutTmpB = OF_5to5b.acquire(ObjectFifoPort.Produce, 1) + call( gray2rgbaLine, [elemInTmpA, elemOutTmpB, arith.constant(lineWidth)], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_5to5a", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_5to5b", 1) + OF_5to5a.release(ObjectFifoPort.Consume, 1) + OF_5to5b.release(ObjectFifoPort.Produce, 1) # bitwise AND - elemInTmpB1 = acquire( - ObjectFifoPort.Consume, "OF_5to5b", 1, line_bytes_ty - ).acquired_elem() - elemInTmpB2 = acquire( - ObjectFifoPort.Consume, "inOF_L2L1", 1, line_bytes_ty - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, "outOF_L1L2", 1, line_bytes_ty - ).acquired_elem() - Call( + elemInTmpB1 = OF_5to5b.acquire(ObjectFifoPort.Consume, 1) + elemInTmpB2 = inOF_L2L1.acquire(ObjectFifoPort.Consume, 1) + elemOut = outOF_L1L2.acquire(ObjectFifoPort.Produce, 1) + call( bitwiseANDLine, [ elemInTmpB1, @@ -344,9 +234,9 @@ def coreBody(): arith.constant(lineWidthInBytes), ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_5to5b", 1) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L2L1", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOF_L1L2", 1) + OF_5to5b.release(ObjectFifoPort.Consume, 1) + inOF_L2L1.release(ObjectFifoPort.Consume, 1) + outOF_L1L2.release(ObjectFifoPort.Produce, 1) yield_([]) # To/from AIE-array data movement diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py index 3c43c605dc..6a49466b64 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py @@ -34,12 +34,6 @@ def color_threshold(): def device_body(): line_channels_ty = T.memref(lineWidthChannels, T.ui8()) line_ty = T.memref(lineWidth, T.ui8()) - ofifo_line_channels_ty = TypeAttr.get( - ObjectFifoType.get(T.memref(lineWidthChannels, T.ui8())) - ) - ofifo_line_ty = TypeAttr.get( - ObjectFifoType.get(T.memref(lineWidth, T.ui8())) - ) # AIE Core Function declarations thresholdLine = external_func( @@ -58,45 +52,45 @@ def device_body(): # AIE-array data movement with object fifos # Input RGBA broadcast + memtile for skip - objectfifo( - "inOOB_L3L2", ShimTile, [MemTile], 2, ofifo_line_channels_ty, [], [] + inOOB_L3L2 = object_fifo( + "inOOB_L3L2", ShimTile, MemTile, 2, line_channels_ty ) - objectfifo( - "inOOB_L2L1_0", MemTile, [ComputeTile2], 2, ofifo_line_ty, [], [] + inOOB_L2L1_0 = object_fifo( + "inOOB_L2L1_0", MemTile, ComputeTile2, 2, line_ty ) - objectfifo( - "inOOB_L2L1_1", MemTile, [ComputeTile3], 2, ofifo_line_ty, [], [] + inOOB_L2L1_1 = object_fifo( + "inOOB_L2L1_1", MemTile, ComputeTile3, 2, line_ty ) - objectfifo( - "inOOB_L2L1_2", MemTile, [ComputeTile4], 2, ofifo_line_ty, [], [] + inOOB_L2L1_2 = object_fifo( + "inOOB_L2L1_2", MemTile, ComputeTile4, 2, line_ty ) - objectfifo( - "inOOB_L2L1_3", MemTile, [ComputeTile5], 2, ofifo_line_ty, [], [] + inOOB_L2L1_3 = object_fifo( + "inOOB_L2L1_3", MemTile, ComputeTile5, 2, line_ty ) - objectfifo_link( - ["inOOB_L3L2"], - ["inOOB_L2L1_0", "inOOB_L2L1_1", "inOOB_L2L1_2", "inOOB_L2L1_3"], + object_fifo_link( + inOOB_L3L2, + [inOOB_L2L1_0, inOOB_L2L1_1, inOOB_L2L1_2, inOOB_L2L1_3], ) # Output RGBA - objectfifo( - "outOOB_L2L3", MemTile, [ShimTile], 2, ofifo_line_channels_ty, [], [] + outOOB_L2L3 = object_fifo( + "outOOB_L2L3", MemTile, ShimTile, 2, line_channels_ty ) - objectfifo( - "outOOB_L1L2_0", ComputeTile2, [MemTile], 2, ofifo_line_ty, [], [] + outOOB_L1L2_0 = object_fifo( + "outOOB_L1L2_0", ComputeTile2, MemTile, 2, line_ty ) - objectfifo( - "outOOB_L1L2_1", ComputeTile3, [MemTile], 2, ofifo_line_ty, [], [] + outOOB_L1L2_1 = object_fifo( + "outOOB_L1L2_1", ComputeTile3, MemTile, 2, line_ty ) - objectfifo( - "outOOB_L1L2_2", ComputeTile4, [MemTile], 2, ofifo_line_ty, [], [] + outOOB_L1L2_2 = object_fifo( + "outOOB_L1L2_2", ComputeTile4, MemTile, 2, line_ty ) - objectfifo( - "outOOB_L1L2_3", ComputeTile5, [MemTile], 2, ofifo_line_ty, [], [] + outOOB_L1L2_3 = object_fifo( + "outOOB_L1L2_3", ComputeTile5, MemTile, 2, line_ty ) - objectfifo_link( - ["outOOB_L1L2_0", "outOOB_L1L2_1", "outOOB_L1L2_2", "outOOB_L1L2_3"], - ["outOOB_L2L3"], + object_fifo_link( + [outOOB_L1L2_0, outOOB_L1L2_1, outOOB_L1L2_2, outOOB_L1L2_3], + outOOB_L2L3, ) # Runtime parameters @@ -112,18 +106,8 @@ def device_body(): def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, - "inOOB_L2L1_0", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, - "outOOB_L1L2_0", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() + elemIn = inOOB_L2L1_0.acquire(ObjectFifoPort.Consume, 1) + elemOut = outOOB_L1L2_0.acquire(ObjectFifoPort.Produce, 1) # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires @@ -137,7 +121,7 @@ def core_body(): # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8()) - Call( + call( thresholdLine, [ elemIn, @@ -149,8 +133,8 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "inOOB_L2L1_0", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOOB_L1L2_0", 1) + inOOB_L2L1_0.release(ObjectFifoPort.Consume, 1) + outOOB_L1L2_0.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 3 @@ -158,18 +142,8 @@ def core_body(): def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, - "inOOB_L2L1_1", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, - "outOOB_L1L2_1", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() + elemIn = inOOB_L2L1_1.acquire(ObjectFifoPort.Consume, 1) + elemOut = outOOB_L1L2_1.acquire(ObjectFifoPort.Produce, 1) # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires thresholdValue = arith.trunci( @@ -182,7 +156,7 @@ def core_body(): # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8()) - Call( + call( thresholdLine, [ elemIn, @@ -194,8 +168,8 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "inOOB_L2L1_1", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOOB_L1L2_1", 1) + inOOB_L2L1_1.release(ObjectFifoPort.Consume, 1) + outOOB_L1L2_1.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 4 @@ -203,18 +177,8 @@ def core_body(): def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, - "inOOB_L2L1_2", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, - "outOOB_L1L2_2", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() + elemIn = inOOB_L2L1_2.acquire(ObjectFifoPort.Consume, 1) + elemOut = outOOB_L1L2_2.acquire(ObjectFifoPort.Produce, 1) # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires @@ -228,7 +192,7 @@ def core_body(): # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8()) - Call( + call( thresholdLine, [ elemIn, @@ -240,8 +204,8 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "inOOB_L2L1_2", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOOB_L1L2_2", 1) + inOOB_L2L1_2.release(ObjectFifoPort.Consume, 1) + outOOB_L1L2_2.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 5 @@ -249,18 +213,8 @@ def core_body(): def core_body(): # for _ in for_(4096): for _ in for_(sys.maxsize): - elemIn = acquire( - ObjectFifoPort.Consume, - "inOOB_L2L1_3", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() - elemOut = acquire( - ObjectFifoPort.Produce, - "outOOB_L1L2_3", - 1, - T.memref(lineWidth, T.ui8()), - ).acquired_elem() + elemIn = inOOB_L2L1_3.acquire(ObjectFifoPort.Consume, 1) + elemOut = outOOB_L1L2_3.acquire(ObjectFifoPort.Produce, 1) # RTPs written from the instruction stream must be read right before the kernel # after the ObjectFIFO acquires @@ -274,7 +228,7 @@ def core_body(): # maxValue = arith.constant(255, T.i16()) # thresholdValue = arith.constant(50, T.i16()) # thresholdType = arith.constant(0, T.i8() - Call( + call( thresholdLine, [ elemIn, @@ -286,8 +240,8 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "inOOB_L2L1_3", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOOB_L1L2_3", 1) + inOOB_L2L1_3.release(ObjectFifoPort.Consume, 1) + outOOB_L1L2_3.release(ObjectFifoPort.Produce, 1) yield_([]) # To/from AIE-array data movement diff --git a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py index 0a7cfbc696..dd481cfc65 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py @@ -38,9 +38,6 @@ def device_body(): line_ty = T.memref(lineWidth, T.ui8()) memRef_3x3_ty = T.memref(3, 3, T.i16()) - ofifo_line_bytes_ty = TypeAttr.get(ObjectFifoType.get(line_bytes_ty)) - ofifo_line_ty = TypeAttr.get(ObjectFifoType.get(line_ty)) - # AIE Core Function declarations rgba2gray_line = external_func( "rgba2grayLine", inputs=[line_bytes_ty, line_ty, T.i32()] @@ -79,83 +76,67 @@ def device_body(): # AIE-array data movement with object fifos # Input - objectfifo( + inOF_L3L2 = object_fifo( "inOF_L3L2", ShimTile, [ComputeTile2, MemTile], [2, 2, 7], - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) - objectfifo( + inOF_L2L1 = object_fifo( "inOF_L2L1", MemTile, - [ComputeTile5], + ComputeTile5, 7, - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) - objectfifo_link(["inOF_L3L2"], ["inOF_L2L1"]) + object_fifo_link(inOF_L3L2, inOF_L2L1) # Output - objectfifo( + outOF_L2L3 = object_fifo( "outOF_L2L3", MemTile, - [ShimTile], + ShimTile, 2, - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) - objectfifo( + outOF_L1L2 = object_fifo( "outOF_L1L2", ComputeTile5, - [MemTile], + MemTile, 2, - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) - objectfifo_link(["outOF_L1L2"], ["outOF_L2L3"]) + object_fifo_link(outOF_L1L2, outOF_L2L3) # Intermediate - objectfifo( + OF_2to3 = object_fifo( "OF_2to3", ComputeTile2, - [ComputeTile3], + ComputeTile3, 4, - ofifo_line_ty, - [], - [], + line_ty, ) - objectfifo( + OF_3to4 = object_fifo( "OF_3to4", ComputeTile3, - [ComputeTile4], + ComputeTile4, 2, - ofifo_line_ty, - [], - [], + line_ty, ) - objectfifo( + OF_4to5 = object_fifo( "OF_4to5", ComputeTile4, - [ComputeTile5], + ComputeTile5, 2, - ofifo_line_ty, - [], - [], + line_ty, ) - objectfifo( + OF_5to5 = object_fifo( "OF_5to5", ComputeTile5, - [ComputeTile5], + ComputeTile5, 1, - ofifo_line_bytes_ty, - [], - [], + line_bytes_ty, ) # Set up compute tiles @@ -165,17 +146,13 @@ def device_body(): def core_body(): for _ in for_(4294967295): # for _ in for_(36): - elem_in = acquire( - ObjectFifoPort.Consume, "inOF_L3L2", 1, line_bytes_ty - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_2to3", 1, line_ty - ).acquired_elem() + elem_in = inOF_L3L2.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_2to3.acquire(ObjectFifoPort.Produce, 1) - Call(rgba2gray_line, [elem_in, elem_out, arith.constant(lineWidth)]) + call(rgba2gray_line, [elem_in, elem_out, arith.constant(lineWidth)]) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L3L2", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_2to3", 1) + inOF_L3L2.release(ObjectFifoPort.Consume, 1) + OF_2to3.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 3 @@ -197,13 +174,9 @@ def core_body(): for _ in for_(4294967295): # Preamble : Top Border - elems_in_pre = acquire( - ObjectFifoPort.Consume, "OF_2to3", 2, line_ty - ).acquired_elem() - elem_pre_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, line_ty - ).acquired_elem() - Call( + elems_in_pre = OF_2to3.acquire(ObjectFifoPort.Consume, 2) + elem_pre_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in_pre[0], @@ -214,17 +187,13 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_3to4.release(ObjectFifoPort.Produce, 1) # Steady State : Middle for _ in for_(1, heightMinus1): - elems_in = acquire( - ObjectFifoPort.Consume, "OF_2to3", 3, line_ty - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, line_ty - ).acquired_elem() - Call( + elems_in = OF_2to3.acquire(ObjectFifoPort.Consume, 3) + elem_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in[0], @@ -235,18 +204,14 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to3", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_2to3.release(ObjectFifoPort.Consume, 1) + OF_3to4.release(ObjectFifoPort.Produce, 1) yield_([]) # Postamble : Bottom Border - elems_in_post = acquire( - ObjectFifoPort.Consume, "OF_2to3", 2, line_ty - ).acquired_elem() - elem_post_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, line_ty - ).acquired_elem() - Call( + elems_in_post = OF_2to3.acquire(ObjectFifoPort.Consume, 2) + elem_post_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in_post[0], @@ -257,8 +222,8 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to3", 2) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_2to3.release(ObjectFifoPort.Consume, 2) + OF_3to4.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 4 @@ -269,14 +234,10 @@ def core_body(): v_typ = arith.constant(0, T.i8()) for _ in for_(4294967295): - elem_in = acquire( - ObjectFifoPort.Consume, "OF_3to4", 1, line_ty - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_4to5", 1, line_ty - ).acquired_elem() - - Call( + elem_in = OF_3to4.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_4to5.acquire(ObjectFifoPort.Produce, 1) + + call( threshold_line, [ elem_in, @@ -288,41 +249,31 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_3to4", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_4to5", 1) + OF_3to4.release(ObjectFifoPort.Consume, 1) + OF_4to5.release(ObjectFifoPort.Produce, 1) yield_([]) # Compute tile 5 @core(ComputeTile5, "combined_gray2rgba_addWeighted.a") def core_body(): for _ in for_(4294967295): - elem_in = acquire( - ObjectFifoPort.Consume, "OF_4to5", 1, line_ty - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_5to5", 1, line_bytes_ty - ).acquired_elem() - - Call(gray2rgba_line, [elem_in, elem_out, arith.constant(lineWidth)]) - - objectfifo_release(ObjectFifoPort.Consume, "OF_4to5", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_5to5", 1) - - elem_in1 = acquire( - ObjectFifoPort.Consume, "OF_5to5", 1, line_bytes_ty - ).acquired_elem() - elem_in2 = acquire( - ObjectFifoPort.Consume, "inOF_L2L1", 1, line_bytes_ty - ).acquired_elem() - elem_out2 = acquire( - ObjectFifoPort.Produce, "outOF_L1L2", 1, line_bytes_ty - ).acquired_elem() + elem_in = OF_4to5.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_5to5.acquire(ObjectFifoPort.Produce, 1) + + call(gray2rgba_line, [elem_in, elem_out, arith.constant(lineWidth)]) + + OF_4to5.release(ObjectFifoPort.Consume, 1) + OF_5to5.release(ObjectFifoPort.Produce, 1) + + elem_in1 = OF_5to5.acquire(ObjectFifoPort.Consume, 1) + elem_in2 = inOF_L2L1.acquire(ObjectFifoPort.Consume, 1) + elem_out2 = outOF_L1L2.acquire(ObjectFifoPort.Produce, 1) alpha = arith.constant(16384, T.i16()) beta = arith.constant(16384, T.i16()) gamma = arith.constant(0, T.i8()) - Call( + call( add_weighted_line, [ elem_in1, @@ -335,9 +286,9 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_5to5", 1) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L2L1", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOF_L1L2", 1) + OF_5to5.release(ObjectFifoPort.Consume, 1) + inOF_L2L1.release(ObjectFifoPort.Consume, 1) + outOF_L1L2.release(ObjectFifoPort.Produce, 1) yield_([]) # To/from AIE-array data movement diff --git a/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py b/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py index 170d4c5ba0..5422f803d1 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py +++ b/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py @@ -33,7 +33,6 @@ def passThroughAIE2(): def device_body(): # define types line_ty = T.memref(lineWidthInBytes, T.ui8()) - ofifo_line_ty = TypeAttr.get(ObjectFifoType.get(line_ty)) # AIE Core Function declarations passThroughLine = external_func( @@ -45,11 +44,11 @@ def device_body(): ComputeTile2 = tile(0, 2) if enableTrace: - FlowOp(ComputeTile2, "Trace", 0, ShimTile, "DMA", 1) + flow(ComputeTile2, "Trace", 0, ShimTile, "DMA", 1) # AIE-array data movement with object fifos - objectfifo("in", ShimTile, [ComputeTile2], 2, ofifo_line_ty, [], []) - objectfifo("out", ComputeTile2, [ShimTile], 2, ofifo_line_ty, [], []) + of_in = object_fifo("in", ShimTile, ComputeTile2, 2, line_ty) + of_out = object_fifo("out", ComputeTile2, ShimTile, 2, line_ty) # Set up compute tiles @@ -58,15 +57,11 @@ def device_body(): def core_body(): for _ in for_(sys.maxsize): for _ in for_(height): - elemOut = acquire( - ObjectFifoPort.Produce, "out", 1, line_ty - ).acquired_elem() - elemIn = acquire( - ObjectFifoPort.Consume, "in", 1, line_ty - ).acquired_elem() - Call(passThroughLine, [elemIn, elemOut, width]) - objectfifo_release(ObjectFifoPort.Consume, "in", 1) - objectfifo_release(ObjectFifoPort.Produce, "out", 1) + elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) + elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) + call(passThroughLine, [elemIn, elemOut, width]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) yield_([]) yield_([]) diff --git a/test/python/aie_ops.py b/test/python/aie_ops.py index b0e60e069e..de7376ad76 100644 --- a/test/python/aie_ops.py +++ b/test/python/aie_ops.py @@ -7,19 +7,18 @@ from aie.dialects.aie import ( AIEDevice, - Buffer, Core, Device, - ExternalBuffer, MemOp, ObjectFifoPort, - ObjectFifoType, - acquire, + ObjectFifoSubviewType, + buffer, + external_buffer, bd_dim_layout, end, - objectfifo, - objectfifo_link, - objectfifo_release, + object_fifo, + objectfifo_acquire, + object_fifo_link, objectfifo_subview_access, tile, cascade_flow, @@ -84,11 +83,11 @@ def deviceOp(): @construct_and_print_module def bufferOp(): t = tile(col=0, row=3) - b = Buffer(tile=t, shape=(12,), datatype=T.i32()) - b = Buffer( - tile=t, - shape=(2, 2), - datatype=T.i32(), + b = buffer(t, (12,), T.i32()) + b = buffer( + t, + (2, 2), + T.i32(), initial_value=np.arange(2 * 2, dtype=np.int32).reshape(2, 2), ) @@ -97,7 +96,7 @@ def bufferOp(): # CHECK: %[[VAL_0:.*]] = aie.external_buffer : memref<12xi32> @construct_and_print_module def externalBufferOp(): - b = ExternalBuffer(shape=(12,), datatype=T.i32()) + b = external_buffer((12,), T.i32()) # CHECK-LABEL: objFifo @@ -111,12 +110,12 @@ def objFifo(): with InsertionPoint(bb): tile0 = tile(col=6, row=6) tile1 = tile(col=2, row=2) - objectfifo( + object_fifo( "of0", tile0, - [tile1], + tile1, 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), + T.memref(12, T.f16()), [bd_dim_layout(size=1, stride=2)], [[bd_dim_layout(size=1, stride=2)]], ) @@ -138,25 +137,9 @@ def objFifoLink(): tile0 = tile(col=6, row=6) tile1 = tile(col=2, row=2) tile2 = tile(col=7, row=7) - objectfifo( - "of0", - tile0, - [tile1], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), - [], - [], - ) - objectfifo( - "of1", - tile1, - [tile2], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), - [], - [], - ) - objectfifo_link(["of0"], ["of1"]) + of0 = object_fifo("of0", tile0, tile1, 2, T.memref(12, T.f16())) + of1 = object_fifo("of1", tile1, tile2, 2, T.memref(12, T.f16())) + object_fifo_link(of0, of1) end() @@ -172,24 +155,11 @@ def objFifoAcquire(): with InsertionPoint(bb): tile0 = tile(col=6, row=6) tile1 = tile(col=2, row=2) - objectfifo( - "of0", - tile0, - [tile1], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), - [], - [], - ) + of0 = object_fifo("of0", tile0, tile1, 2, T.memref(12, T.f16())) C = Core(tile1) bb = Block.create_at_start(C.body) with InsertionPoint(bb): - acq = acquire( - port=ObjectFifoPort.Consume, - of_name="of0", - num_elem=1, - datatype=T.memref(12, T.f16()), - ) + acq = of0.acquire(port=ObjectFifoPort.Consume, num_elem=1) end() @@ -206,23 +176,15 @@ def objFifoSubviewAccess(): with InsertionPoint(bb): tile0 = tile(col=6, row=6) tile1 = tile(col=2, row=2) - objectfifo( - "of0", - tile0, - [tile1], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), - [], - [], - ) + of0 = object_fifo("of0", tile0, tile1, 2, T.memref(12, T.f16())) C = Core(tile1) bb = Block.create_at_start(C.body) with InsertionPoint(bb): - acq = acquire( - port=ObjectFifoPort.Consume, - of_name="of0", - num_elem=1, - datatype=T.memref(12, T.f16()), + acq = objectfifo_acquire( + ObjectFifoSubviewType.get(T.memref(12, T.f16())), + ObjectFifoPort.Consume, + "of0", + 1, ) subview = objectfifo_subview_access( T.memref(12, T.f16()), subview=acq, index=0 @@ -242,19 +204,11 @@ def objFifoRelease(): with InsertionPoint(bb): tile0 = tile(col=6, row=6) tile1 = tile(col=2, row=2) - objectfifo( - "of0", - tile0, - [tile1], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(12, T.f16()))), - [], - [], - ) + of0 = object_fifo("of0", tile0, tile1, 2, T.memref(12, T.f16())) C = Core(tile0) bb = Block.create_at_start(C.body) with InsertionPoint(bb): - acq = objectfifo_release(ObjectFifoPort.Produce, "of0", 1) + acq = of0.release(ObjectFifoPort.Produce, 1) end() diff --git a/test/python/code_region.py b/test/python/code_region.py index 1d9534440e..a45d495e99 100644 --- a/test/python/code_region.py +++ b/test/python/code_region.py @@ -6,16 +6,13 @@ import aie.extras.types as T from aie.dialects.aie import ( AIEDevice, - Call, + call, ObjectFifoPort, - ObjectFifoType, - acquire, core, device, external_func, - objectfifo, - objectfifo_link, - objectfifo_release, + object_fifo, + object_fifo_link, tile, ) from aie.dialects.scf import for_, yield_ @@ -58,32 +55,14 @@ def device_body(): M = tile(1, 2) N = tile(3, 3) - objectfifo( - "of0", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.i32()))), - [], - [], - ) - objectfifo( - "of1", - M, - [N], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(8, 8, T.i32()))), - [], - [], - ) - objectfifo_link(["of0"], ["of1"]) + of0 = object_fifo("of0", S, M, 2, T.memref(256, T.i32())) + of1 = object_fifo("of1", M, N, 2, T.memref(8, 8, T.i32())) + object_fifo_link(of0, of1) @core(N, "test.o") def core_body(): for _ in range_(10): - elem0 = acquire( - ObjectFifoPort.Consume, "of1", 1, T.memref(8, 8, T.i32()) - ).acquired_elem() - res = Call("test_func", [elem0], [T.i32()]) - objectfifo_release(ObjectFifoPort.Consume, "of1", 1) + elem0 = of1.acquire(ObjectFifoPort.Consume, 1) + res = call("test_func", [elem0], [T.i32()]) + of1.release(ObjectFifoPort.Consume, 1) yield_([]) diff --git a/test/python/core_ext_kernel.py b/test/python/core_ext_kernel.py index 7cb14af817..0c28c96e11 100644 --- a/test/python/core_ext_kernel.py +++ b/test/python/core_ext_kernel.py @@ -6,16 +6,13 @@ import aie.extras.types as T from aie.dialects.aie import ( AIEDevice, - Call, + call, Core, Device, ObjectFifoPort, - ObjectFifoType, - acquire, external_func, - objectfifo, - objectfifo_link, - objectfifo_release, + object_fifo, + object_fifo_link, tile, end, ) @@ -65,34 +62,16 @@ def core_ext_kernel(): M = tile(1, 2) N = tile(3, 3) - objectfifo( - "of0", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.i32()))), - [], - [], - ) - objectfifo( - "of1", - M, - [N], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(8, 8, T.i32()))), - [], - [], - ) - objectfifo_link(["of0"], ["of1"]) + of0 = object_fifo("of0", S, M, 2, T.memref(256, T.i32())) + of1 = object_fifo("of1", M, N, 2, T.memref(8, 8, T.i32())) + object_fifo_link(of0, of1) C = Core(N, "test.o") bb = Block.create_at_start(C.body) with InsertionPoint(bb): for _ in range_(10): - elem0 = acquire( - ObjectFifoPort.Consume, "of1", 1, T.memref(8, 8, T.i32()) - ).acquired_elem() - res = Call("test_func", [elem0, arith.constant(4)], [T.i32()]) - objectfifo_release(ObjectFifoPort.Consume, "of1", 1) + elem0 = of1.acquire(ObjectFifoPort.Consume, 1) + res = call("test_func", [elem0, arith.constant(4)], [T.i32()]) + of1.release(ObjectFifoPort.Consume, 1) yield_([]) end() diff --git a/test/python/ipu.py b/test/python/ipu.py index 321b0ad4e0..e2ad6959e8 100644 --- a/test/python/ipu.py +++ b/test/python/ipu.py @@ -11,19 +11,16 @@ import aie.extras.types as T from aie.dialects.aie import ( AIEDevice, - Call, + call, DMAChannelDir, LockAction, ObjectFifoPort, - ObjectFifoType, WireBundle, - acquire, core, device, external_func, - objectfifo, - objectfifo_link, - objectfifo_release, + object_fifo, + object_fifo_link, tile, ) from aie.dialects.aiex import ipu_sync, ipu_dma_memcpy_nd @@ -61,24 +58,8 @@ def device_body(): S = tile(0, 0) M = tile(0, 2) - objectfifo( - "in", - S, - [M], - buffer_depth, - TypeAttr.get(ObjectFifoType.get(T.memref(n, T.i32()))), - [], - [], - ) - objectfifo( - "out", - M, - [S], - buffer_depth, - TypeAttr.get(ObjectFifoType.get(T.memref(n, T.i32()))), - [], - [], - ) + of_in = object_fifo("in", S, M, buffer_depth, T.memref(n, T.i32())) + of_out = object_fifo("out", M, S, buffer_depth, T.memref(n, T.i32())) @core(M, "scale.o") def core_body(): @@ -86,15 +67,11 @@ def core_body(): for _ in range_(0xFFFFFFFF): # Number of sub-vector "tile" iterations for _ in range_(N_div_n): - elem_out = acquire( - ObjectFifoPort.Produce, "out", 1, T.memref(n, T.i32()) - ).acquired_elem() - elem_in = acquire( - ObjectFifoPort.Consume, "in", 1, T.memref(n, T.i32()) - ).acquired_elem() - Call(scale_int32, [elem_in, elem_out]) - objectfifo_release(ObjectFifoPort.Consume, "in", 1) - objectfifo_release(ObjectFifoPort.Produce, "out", 1) + elem_out = of_out.acquire(ObjectFifoPort.Produce, 1) + elem_in = of_in.acquire(ObjectFifoPort.Consume, 1) + call(scale_int32, [elem_in, elem_out]) + of_in.release(ObjectFifoPort.Consume, 1) + of_out.release(ObjectFifoPort.Produce, 1) yield_([]) yield_([]) @@ -171,62 +148,32 @@ def device_body(): S = tile(0, 0) M = tile(0, 2) - objectfifo( - "inA", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(m, k, T.i16()))), - [], - [], - ) - objectfifo( - "inB", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(k, n, T.i16()))), - [], - [], - ) - objectfifo( - "outC", - M, - [S], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(m, n, T.i16()))), - [], - [], - ) + of_inA = object_fifo("inA", S, M, 2, T.memref(m, k, T.i16())) + of_inB = object_fifo("inB", S, M, 2, T.memref(k, n, T.i16())) + of_outC = object_fifo("outC", M, S, 2, T.memref(m, n, T.i16())) @core(M, "mm.o") def core_body(): for _ in range_(0xFFFFFFFF): for _ in range_(tiles): - elem_out = acquire( - ObjectFifoPort.Produce, "outC", 1, T.memref(m, n, T.i16()) - ).acquired_elem() + elem_out = of_outC.acquire(ObjectFifoPort.Produce, 1) if vectorized: - Call(zero, [elem_out]) + call(zero, [elem_out]) else: - Call(zero_scalar, [elem_out]) + call(zero_scalar, [elem_out]) for _ in range_(K_div_k): - elem_in_a = acquire( - ObjectFifoPort.Consume, "inA", 1, T.memref(m, k, T.i16()) - ).acquired_elem() - elem_in_b = acquire( - ObjectFifoPort.Consume, "inB", 1, T.memref(k, n, T.i16()) - ).acquired_elem() + elem_in_a = of_inA.acquire(ObjectFifoPort.Consume, 1) + elem_in_b = of_inB.acquire(ObjectFifoPort.Consume, 1) if vectorized: - Call(matmul, [elem_in_a, elem_in_b, elem_out]) + call(matmul, [elem_in_a, elem_in_b, elem_out]) else: - Call(matmul_scalar, [elem_in_a, elem_in_b, elem_out]) - objectfifo_release(ObjectFifoPort.Consume, "inA", 1) - objectfifo_release(ObjectFifoPort.Consume, "inB", 1) + call(matmul_scalar, [elem_in_a, elem_in_b, elem_out]) + of_inA.release(ObjectFifoPort.Consume, 1) + of_inB.release(ObjectFifoPort.Consume, 1) yield_([]) - objectfifo_release(ObjectFifoPort.Produce, "outC", 1) + of_outC.release(ObjectFifoPort.Produce, 1) yield_([]) yield_([]) @@ -339,97 +286,31 @@ def device_body(): T4 = tile(0, 4) T5 = tile(0, 5) - objectfifo( - "inOF_L3L2", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), - [], - [], - ) - objectfifo( - "inOF_L2L1", - M, - [T2, T5], - [2, 2, 7], - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), - [], - [], - ) - objectfifo_link(["inOF_L3L2"], ["inOF_L2L1"]) - - objectfifo( - "outOF_L2L3", - M, - [S], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), - [], - [], - ) - objectfifo( - "outOF_L1L2", - T5, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), - [], - [], - ) - objectfifo_link(["outOF_L1L2"], ["outOF_L2L3"]) - - objectfifo( - "OF_2to3", - T2, - [T3], - 4, - TypeAttr.get(ObjectFifoType.get(T.memref(64, T.ui8()))), - [], - [], - ) - objectfifo( - "OF_3to4", - T3, - [T4], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(64, T.ui8()))), - [], - [], - ) - objectfifo( - "OF_4to5", - T4, - [T5], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(64, T.ui8()))), - [], - [], - ) - objectfifo( - "OF_5to5", - T5, - [T5], - 1, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), - [], - [], + inOF_L3L2 = object_fifo("inOF_L3L2", S, M, 2, T.memref(256, T.ui8())) + inOF_L2L1 = object_fifo( + "inOF_L2L1", M, [T2, T5], [2, 2, 7], T.memref(256, T.ui8()) ) + object_fifo_link(inOF_L3L2, inOF_L2L1) + + outOF_L2L3 = object_fifo("outOF_L2L3", M, S, 2, T.memref(256, T.ui8())) + outOF_L1L2 = object_fifo("outOF_L1L2", T5, M, 2, T.memref(256, T.ui8())) + object_fifo_link(outOF_L1L2, outOF_L2L3) + + OF_2to3 = object_fifo("OF_2to3", T2, T3, 4, T.memref(64, T.ui8())) + OF_3to4 = object_fifo("OF_3to4", T3, T4, 2, T.memref(64, T.ui8())) + OF_4to5 = object_fifo("OF_4to5", T4, T5, 2, T.memref(64, T.ui8())) + OF_5to5 = object_fifo("OF_5to5", T5, T5, 1, T.memref(256, T.ui8())) @core(T2, "rgba2gray.cc.o") def core_body(): for _ in range_(36): - elem_in = acquire( - ObjectFifoPort.Consume, "inOF_L2L1", 1, T.memref(256, T.ui8()) - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_2to3", 1, T.memref(64, T.ui8()) - ).acquired_elem() + elem_in = inOF_L2L1.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_2to3.acquire(ObjectFifoPort.Produce, 1) - Call(rgba2gray_line, [elem_in, elem_out, arith.constant(64)]) + call(rgba2gray_line, [elem_in, elem_out, arith.constant(64)]) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L2L1", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_2to3", 1) + inOF_L2L1.release(ObjectFifoPort.Consume, 1) + OF_2to3.release(ObjectFifoPort.Produce, 1) yield_([]) @core(T3, "filter2d.cc.o") @@ -449,13 +330,9 @@ def core_body(): memref.store(v0, kernel, [2, 2]) # Preamble : Top Border - elems_in_pre = acquire( - ObjectFifoPort.Consume, "OF_2to3", 2, T.memref(64, T.ui8()) - ).acquired_elem() - elem_pre_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, T.memref(64, T.ui8()) - ).acquired_elem() - Call( + elems_in_pre = OF_2to3.acquire(ObjectFifoPort.Consume, 2) + elem_pre_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in_pre[0], @@ -466,17 +343,13 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_3to4.release(ObjectFifoPort.Produce, 1) # Steady State : Middle for _ in range_(1, 35): - elems_in = acquire( - ObjectFifoPort.Consume, "OF_2to3", 3, T.memref(64, T.ui8()) - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, T.memref(64, T.ui8()) - ).acquired_elem() - Call( + elems_in = OF_2to3.acquire(ObjectFifoPort.Consume, 3) + elem_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in[0], @@ -487,18 +360,14 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to3", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_2to3.release(ObjectFifoPort.Consume, 1) + OF_3to4.release(ObjectFifoPort.Produce, 1) yield_([]) # Postamble : Bottom Border - elems_in_post = acquire( - ObjectFifoPort.Consume, "OF_2to3", 2, T.memref(64, T.ui8()) - ).acquired_elem() - elem_post_out = acquire( - ObjectFifoPort.Produce, "OF_3to4", 1, T.memref(64, T.ui8()) - ).acquired_elem() - Call( + elems_in_post = OF_2to3.acquire(ObjectFifoPort.Consume, 2) + elem_post_out = OF_3to4.acquire(ObjectFifoPort.Produce, 1) + call( filter2d_line, [ elems_in_post[0], @@ -509,8 +378,8 @@ def core_body(): kernel, ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_2to3", 2) - objectfifo_release(ObjectFifoPort.Produce, "OF_3to4", 1) + OF_2to3.release(ObjectFifoPort.Consume, 2) + OF_3to4.release(ObjectFifoPort.Produce, 1) @core(T4, "threshold.cc.o") def core_body(): @@ -519,52 +388,38 @@ def core_body(): v_typ = arith.constant(0, T.i8()) for _ in range_(36): - elem_in = acquire( - ObjectFifoPort.Consume, "OF_3to4", 1, T.memref(64, T.ui8()) - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_4to5", 1, T.memref(64, T.ui8()) - ).acquired_elem() - - Call( + elem_in = OF_3to4.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_4to5.acquire(ObjectFifoPort.Produce, 1) + + call( threshold_line, [elem_in, elem_out, arith.constant(64), v_thr, v_max, v_typ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_3to4", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_4to5", 1) + OF_3to4.release(ObjectFifoPort.Consume, 1) + OF_4to5.release(ObjectFifoPort.Produce, 1) yield_([]) @core(T5, "combined_gray2rgba_addWeighted.a") def core_body(): for _ in range_(36): - elem_in = acquire( - ObjectFifoPort.Consume, "OF_4to5", 1, T.memref(64, T.ui8()) - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "OF_5to5", 1, T.memref(256, T.ui8()) - ).acquired_elem() - - Call(gray2rgba_line, [elem_in, elem_out, arith.constant(64)]) - - objectfifo_release(ObjectFifoPort.Consume, "OF_4to5", 1) - objectfifo_release(ObjectFifoPort.Produce, "OF_5to5", 1) - - elem_in1 = acquire( - ObjectFifoPort.Consume, "OF_5to5", 1, T.memref(256, T.ui8()) - ).acquired_elem() - elem_in2 = acquire( - ObjectFifoPort.Consume, "inOF_L2L1", 1, T.memref(256, T.ui8()) - ).acquired_elem() - elem_out2 = acquire( - ObjectFifoPort.Produce, "outOF_L1L2", 1, T.memref(256, T.ui8()) - ).acquired_elem() + elem_in = OF_4to5.acquire(ObjectFifoPort.Consume, 1) + elem_out = OF_5to5.acquire(ObjectFifoPort.Produce, 1) + + call(gray2rgba_line, [elem_in, elem_out, arith.constant(64)]) + + OF_4to5.release(ObjectFifoPort.Consume, 1) + OF_5to5.release(ObjectFifoPort.Produce, 1) + + elem_in1 = OF_5to5.acquire(ObjectFifoPort.Consume, 1) + elem_in2 = inOF_L2L1.acquire(ObjectFifoPort.Consume, 1) + elem_out2 = outOF_L1L2.acquire(ObjectFifoPort.Produce, 1) alpha = arith.constant(16384, T.i16()) beta = arith.constant(16384, T.i16()) gamma = arith.constant(0, T.i8()) - Call( + call( add_weighted_line, [ elem_in1, @@ -577,9 +432,9 @@ def core_body(): ], ) - objectfifo_release(ObjectFifoPort.Consume, "OF_5to5", 1) - objectfifo_release(ObjectFifoPort.Consume, "inOF_L2L1", 1) - objectfifo_release(ObjectFifoPort.Produce, "outOF_L1L2", 1) + OF_5to5.release(ObjectFifoPort.Consume, 1) + inOF_L2L1.release(ObjectFifoPort.Consume, 1) + outOF_L1L2.release(ObjectFifoPort.Produce, 1) yield_([]) @FuncOp.from_py_func( @@ -614,62 +469,27 @@ def device_body(): mem_tile = tile(0, 1) compute_tile2 = tile(0, 2) - objectfifo( - "in0", - shim_tile, - [mem_tile], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(16, T.i32()))), - [], - [], - ) - objectfifo( - "in1", - mem_tile, - [compute_tile2], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(8, T.i32()))), - [], - [], - ) - objectfifo_link(["in0"], ["in1"]) - objectfifo( - "out0", - mem_tile, - [shim_tile], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(8, T.i32()))), - [], - [], - ) - objectfifo( - "out1", - compute_tile2, - [mem_tile], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(16, T.i32()))), - [], - [], - ) - objectfifo_link(["out1"], ["out0"]) + of_in0 = object_fifo("in0", shim_tile, mem_tile, 2, T.memref(16, T.i32())) + of_in1 = object_fifo("in1", mem_tile, compute_tile2, 2, T.memref(8, T.i32())) + object_fifo_link(of_in0, of_in1) + + of_out0 = object_fifo("out0", mem_tile, shim_tile, 2, T.memref(8, T.i32())) + of_out1 = object_fifo("out1", compute_tile2, mem_tile, 2, T.memref(16, T.i32())) + object_fifo_link(of_out1, of_out0) @core(compute_tile2) def core_body(): # Effective while(1) for _ in range_(8): - elem_in = acquire( - ObjectFifoPort.Consume, "in1", 1, T.memref(8, T.i32()) - ).acquired_elem() - elem_out = acquire( - ObjectFifoPort.Produce, "out1", 1, T.memref(16, T.i32()) - ).acquired_elem() + elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1) + elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1) for i in range_(8): v0 = memref.load(elem_in, [i]) v1 = arith.addi(v0, arith.constant(1, T.i32())) memref.store(v1, elem_out, [i]) yield_([]) - objectfifo_release(ObjectFifoPort.Consume, "in1", 1) - objectfifo_release(ObjectFifoPort.Produce, "out1", 1) + of_in1.release(ObjectFifoPort.Consume, 1) + of_out1.release(ObjectFifoPort.Produce, 1) yield_([]) @FuncOp.from_py_func( diff --git a/test/python/objFifo.py b/test/python/objFifo.py index 6b3990d14b..6bf5baaee1 100644 --- a/test/python/objFifo.py +++ b/test/python/objFifo.py @@ -7,10 +7,7 @@ from aie.dialects.aie import ( AIEDevice, ObjectFifoPort, - ObjectFifoType, - acquire, - objectfifo, - objectfifo_release, + object_fifo, tile, Device, Core, @@ -46,22 +43,12 @@ def objFifo_example(): S = tile(0, 2) T_ = tile(1, 2) - objectfifo( - "of0", - S, - [T_], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.i32()))), - [], - [], - ) + of0 = object_fifo("of0", S, T_, 2, T.memref(256, T.i32())) C = Core(T_) bb = Block.create_at_start(C.body) with InsertionPoint(bb): - elem0 = acquire( - ObjectFifoPort.Consume, "of0", 1, T.memref(256, T.i32()) - ).acquired_elem() + elem0 = of0.acquire(ObjectFifoPort.Consume, 1) memref.store(arith.constant(10), elem0.result, [0]) - objectfifo_release(ObjectFifoPort.Consume, "of0", 1) + of0.release(ObjectFifoPort.Consume, 1) end() diff --git a/test/python/objFifo_link.py b/test/python/objFifo_link.py index 816bb55dd0..c674411364 100644 --- a/test/python/objFifo_link.py +++ b/test/python/objFifo_link.py @@ -8,8 +8,8 @@ AIEDevice, ObjectFifoType, bd_dim_layout, - objectfifo, - objectfifo_link, + object_fifo, + object_fifo_link, tile, Device, ) @@ -40,32 +40,16 @@ def link_example(): T0 = tile(2, 2) T1 = tile(2, 3) - objectfifo( - "of0", - S, - [M], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.i32()))), - [], - [], - ) - objectfifo( - "of1", - M, - [T0, T1], - 2, - TypeAttr.get(ObjectFifoType.get(T.memref(64, T.i32()))), - [], - [], - ) - objectfifo_link(["of0"], ["of1"]) + of0 = object_fifo("of0", S, M, 2, T.memref(256, T.i32())) + of1 = object_fifo("of1", M, [T0, T1], 2, T.memref(64, T.i32())) + object_fifo_link(of0, of1) - objectfifo( + object_fifo( "of2", M, [T0, T1], [2, 2, 7], - TypeAttr.get(ObjectFifoType.get(T.memref(256, T.ui8()))), + T.memref(256, T.ui8()), [bd_dim_layout(size=1, stride=2)], [[bd_dim_layout(size=1, stride=2)], [bd_dim_layout(size=1, stride=2)]], ) diff --git a/test/python/simple_with_bindings.py b/test/python/simple_with_bindings.py index 92ded8c822..c021fa5002 100644 --- a/test/python/simple_with_bindings.py +++ b/test/python/simple_with_bindings.py @@ -10,7 +10,7 @@ Device, Core, end, - Buffer, + buffer, ) from aie.extras.dialects.ext import memref, arith from aie.ir import InsertionPoint, Block @@ -39,7 +39,7 @@ def simple_with_bindings_example(): dev_block = Block.create_at_start(dev.body_region) with InsertionPoint(dev_block): tile_a = tile(1, 4) - buff = Buffer(tile=tile_a, shape=(256,), datatype=T.i32()) + buff = buffer(tile=tile_a, shape=(256,), dtype=T.i32()) C = Core(tile_a) bb = Block.create_at_start(C.body)