Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable NPU2 for All Programming Examples #1986

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/buildAndTestRyzenAI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ jobs:
pushd build

# -j here to reduce the number of parallel chess jobs.
# -j6 for 32GB RAM, -j12 for 64GB RAM
# -j4 for 32GB RAM, -j12 for 64GB RAM
if [ x"${{ matrix.runner_type }}" == x"amdhx370" ]; then
LIT_OPTS="-j6 $LIT_OPTS"
LIT_OPTS="-j4 $LIT_OPTS"
else
LIT_OPTS="-j12 $LIT_OPTS"
fi
Expand All @@ -129,7 +129,7 @@ jobs:
strategy:
fail-fast: false
matrix:
runner_type: [ amd7940hs ]
runner_type: [ amd7940hs, amdhx370 ]
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -166,9 +166,9 @@ jobs:
pushd build

# -j here to reduce the number of parallel chess jobs.
# -j6 for 32GB RAM, -j12 for 64GB RAM
# -j4 for 32GB RAM, -j12 for 64GB RAM
if [ x"${{ matrix.runner_type }}" == x"amdhx370" ]; then
LIT_OPTS="-j6 $LIT_OPTS"
LIT_OPTS="-j4 $LIT_OPTS"
else
LIT_OPTS="-j12 $LIT_OPTS"
fi
Expand Down
34 changes: 31 additions & 3 deletions programming_examples/ml/softmax/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ include ${srcdir}/../../makefile-common

VPATH := ${srcdir}/../../../aie_kernels/aie2

device ?= npu
targetname = softmax
trace_size = 8192

Expand All @@ -31,40 +32,67 @@ build/dut.cc: ${srcdir}/bf16_softmax.mlir
cd ${@D} && aie-opt $< -affine-super-vectorize="virtual-vector-size=16 test-fastest-varying=0 vectorize-reductions=true" --convert-vector-to-aievec="aie-target=aie2" -lower-affine | aie-translate -aie2=true --aievec-to-cpp -o ${@F}

build/dut.o: build/dut.cc
ifeq ($(device),npu)
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang++ ${PEANOWRAP2_FLAGS} -I../../../../aie_runtime_lib/AIE2 -c ${<F} -o ${@F}
else ifeq ($(device),npu2)
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2P_FLAGS} -I../../../../aie_runtime_lib/AIE2P -c ${<F} -o ${@F}
else
echo "Device type not supported"
endif

build/lut_based_ops.o: ../../../aie_runtime_lib/AIE2/lut_based_ops.cpp
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang++ ${PEANOWRAP2_FLAGS} -I. -c $(<:%=../%) -o ${@F}
else ifeq ($(device),npu2)
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2P_FLAGS} -I. -c $(<:%=../%) -o ${@F}
else
echo "Device type not supported"
endif

build/softmax.o: bf16_softmax.cc
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang++ ${PEANOWRAP2_FLAGS} -I. -I../../../../aie_runtime_lib/AIE2 -c $< -o ${@F}
else ifeq ($(device),npu2)
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2P_FLAGS} -I. -I../../../../aie_runtime_lib/AIE2P -c $< -o ${@F}
else
echo "Device type not supported"
endif

build/kernels.a: build/softmax.o
ar rvs $@ $+

build/aie.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< > $@
python3 $< ${device} > $@

build/aie_trace.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${trace_size} > $@
python3 $< ${device} ${trace_size} > $@


build/final.xclbin: build/aie.mlir build/kernels.a
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--no-xchesscc --no-xbridge --peano ${PEANO_INSTALL_DIR} \
--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
else
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
endif

build/final_trace.xclbin: build/aie_trace.mlir build/kernels.a
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--no-xchesscc --no-xbridge --peano ${PEANO_INSTALL_DIR} \
--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)

else
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%)
endif

${targetname}.exe: ${srcdir}/test.cpp
rm -rf _build
Expand Down
17 changes: 12 additions & 5 deletions programming_examples/ml/softmax/softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker
from aie.iron.placers import SequentialPlacer
from aie.iron.device import NPU1Col1
from aie.iron.device import NPU1Col1, NPU2
from aie.iron.controlflow import range_


def vector_softmax(trace_size):
def vector_softmax(dev, trace_size):
N = 262144 # *1024

# Tile sizes
Expand Down Expand Up @@ -88,13 +88,20 @@ def core_fn(of_in, of_out, softmax_kernel):
rt.drain(outC.cons(), C, wait=True)

# Place components (assign them resources on the device) and generate an MLIR module
return Program(NPU1Col1(), rt).resolve_program(SequentialPlacer())
return Program(dev, rt).resolve_program(SequentialPlacer())


try:
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
device_name = str(sys.argv[1])
if device_name == "npu":
dev = NPU1Col1()
elif device_name == "npu2":
dev = NPU2()
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[2]))
trace_size = 0 if (len(sys.argv) != 3) else int(sys.argv[2])
except ValueError:
print("Argument is not an integer")

module = vector_softmax(trace_size)
module = vector_softmax(dev, trace_size)
print(module)
15 changes: 11 additions & 4 deletions programming_examples/ml/softmax/softmax_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import aie.utils.trace as trace_utils


def vector_softmax(trace_size):
def vector_softmax(dev, trace_size):

word_size_in = 2
N = 262144 # *1024
Expand All @@ -31,7 +31,7 @@ def vector_softmax(trace_size):
tiles = N_div_n // n_cores
buffer_depth = 2

@device(AIEDevice.npu1_1col)
@device(dev)
def device_body():
tile_ty = np.ndarray[(n,), np.dtype[bfloat16]]

Expand Down Expand Up @@ -134,12 +134,19 @@ def sequence(A, C):


try:
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
device_name = str(sys.argv[1])
if device_name == "npu":
dev = AIEDevice.npu1_1col
elif device_name == "npu2":
dev = AIEDevice.npu2
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[2]))
trace_size = 0 if (len(sys.argv) != 3) else int(sys.argv[2])
except ValueError:
print("Argument is not an integer")

with mlir_mod_ctx() as ctx:
vector_softmax(trace_size)
vector_softmax(dev, trace_size)
res = ctx.module.operation.verify()
if res == True:
print(ctx.module)
Expand Down
14 changes: 13 additions & 1 deletion programming_examples/vision/color_detect/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include ${srcdir}/../../makefile-common

VPATH := ${srcdir}/../../../aie_kernels/aie2

device ?= npu
COLORDETECT_WIDTH = 1920
COLORDETECT_HEIGHT = 1080

Expand All @@ -36,21 +37,32 @@ mlir: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir

build/%.cc.o: %.cc
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang++ ${PEANOWRAP2_FLAGS} -DBIT_WIDTH=8 -c $< -o ${@F}
else ifeq ($(device),npu2)
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2P_FLAGS} -DBIT_WIDTH=8 -c $< -o ${@F}
else
echo "Device type not supported"
endif

build/combined_bitwiseOR_gray2rgba_bitwiseAND.a: build/bitwiseOR.cc.o build/gray2rgba.cc.o build/bitwiseAND.cc.o
mkdir -p ${@D}
ar rvs $@ $< $(word 2,$^) $(word 3,$^)

build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${COLORDETECT_WIDTH} ${COLORDETECT_HEIGHT} > $@
python3 $< ${device} ${COLORDETECT_WIDTH} ${COLORDETECT_HEIGHT} > $@

build/final_${COLORDETECT_WIDTH}.xclbin: build/aie2_lineBased_8b_${COLORDETECT_WIDTH}.mlir build/rgba2hue.cc.o build/threshold.cc.o build/combined_bitwiseOR_gray2rgba_bitwiseAND.a
mkdir -p ${@D}
ifeq ($(device),npu)
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --alloc-scheme=basic-sequential \
--no-xchesscc --no-xbridge \
--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
else
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host --alloc-scheme=basic-sequential \
--xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%)
endif

${targetname}.exe: ${srcdir}/test.cpp
mkdir -p ${@D}
Expand Down
39 changes: 22 additions & 17 deletions programming_examples/vision/color_detect/color_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,15 @@

from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker
from aie.iron.placers import SequentialPlacer
from aie.iron.device import NPU1Col1
from aie.iron.device import NPU1Col1, NPU2

width = 64
height = 36
if len(sys.argv) == 3:
width = int(sys.argv[1])
height = int(sys.argv[2])

lineWidth = width
lineWidthInBytes = width * 4
tensorSize = width * height * 4 # 4 channels
def color_detect(dev, width, height):
lineWidth = width
lineWidthInBytes = width * 4
tensorSize = width * height * 4 # 4 channels

traceSize = 1024


def color_detect():
traceSize = 1024

# Define types
line_bytes_ty = np.ndarray[(lineWidthInBytes,), np.dtype[np.uint8]]
Expand Down Expand Up @@ -212,8 +205,20 @@ def or_gray2rgba_and_fn(
rt.drain(outOF_L2L3.cons(), O, wait=True)

# Place components (assign them resources on the device) and generate an MLIR module
return Program(NPU1Col1(), rt).resolve_program(SequentialPlacer())


module = color_detect()
return Program(dev, rt).resolve_program(SequentialPlacer())


try:
device_name = str(sys.argv[1])
if device_name == "npu":
dev = NPU1Col1()
elif device_name == "npu2":
dev = NPU2()
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
width = 36 if (len(sys.argv) != 4) else int(sys.argv[2])
height = 64 if (len(sys.argv) != 4) else int(sys.argv[3])
except ValueError:
print("Argument has inappropriate value")
module = color_detect(dev, width, height)
print(module)
Loading
Loading