Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backend][LLVM] Runtime support for any bitwidth integer numpy input #493

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 3 additions & 59 deletions heterocl/ast/ir_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

# Import MLIR dialects
# Naming rule: import dialect as dialect_d
import numpy as np

from hcl_mlir.dialects import (
func as func_d,
hcl as hcl_d,
Expand Down Expand Up @@ -52,7 +50,7 @@

from . import ast
from ..context import get_context, get_location
from ..utils import hcl_dtype_to_mlir, get_extra_type_hints
from ..utils import hcl_dtype_to_mlir, get_extra_type_hints, make_anywidth_numpy_array
from .. import types as htypes
from . import build_cleaner

Expand Down Expand Up @@ -1408,63 +1406,9 @@ def build_bit_reverse_op(self, op: ast.BitReverseOp, ip):
def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip):
loc = Location.file(op.loc.filename, op.loc.lineno, 0)
dtype = hcl_dtype_to_mlir(op.dtype, signless=True)
shape = op.values.shape
if isinstance(op.dtype, (htypes.Int, htypes.UInt)):
# The following code has several steps to convert the numpy array to have
# the correct data type in order to create an MLIR constant tensor.
# Since MLIR-NumPy Python interface only supports byte-addressable data types,
# we need to change the data type of the array to have the minimum number of bytes
# that can represent the target bitwidth.
# e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array)
# which requires 20 bits (3 bytes) to represent each element
# declaration: 6*6*i20
# numpy input: 6*6*i64
# 1. Decompose the original i32 or i64 array into a structured array of uint8
# -> decompose: 6*6*8*i8
if op.dtype.bits == 1:
val = op.values
array = np.packbits(val, axis=None, bitorder="little")
value_attr = DenseElementsAttr.get(array, shape=val.shape, type=dtype)
else:
# Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc.
# are the field names, and the entire data type is `op.values.dtype`.
# This can be viewed as a `union` type in C/C++.
# Please refer to the documentation for more details:
# https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types
decomposed_np_dtype = np.dtype(
(
op.values.dtype,
{
f"f{i}": (np.uint8, i)
for i in range(op.values.dtype.itemsize)
},
)
)
val = op.values.view(decomposed_np_dtype)
# 2. Compose the uint8 array into a structured array of target bitwidth
# This is done by taking the first several bytes of the uint8 array
# "u1" means one unsigned byte, and "i1" means one signed byte
n_bytes = int(np.ceil(dtype.width / 8))
new_dtype = np.dtype(
{
"names": [f"f{i}" for i in range(n_bytes)],
"formats": (["i1"] if isinstance(dtype, htypes.Int) else ["u1"])
+ ["u1"] * (n_bytes - 1),
"offsets": list(range(n_bytes)),
"itemize": n_bytes,
}
)
# -> compose: 6*6*3*i8
val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1)
# -> flatten: 108*i8
val = val.flatten()
# -> view: 36*i24
val = val.view(np.dtype(new_dtype))
# -> reshape: 6*6*i24
val = val.reshape(shape)
# Pass in the numpy array to get the MLIR attribute
# -> result: 6*6*i20
value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype)
val = make_anywidth_numpy_array(op.values, op.dtype.bits)
value_attr = DenseElementsAttr.get(val, shape=op.values.shape, type=dtype)
else:
val = op.values
value_attr = DenseElementsAttr.get(val)
Expand Down
3 changes: 1 addition & 2 deletions heterocl/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,13 +337,12 @@ def attach_llvm_attrs(module):
hcl_d.lower_composite_type(module)
hcl_d.lower_fixed_to_int(module)
hcl_d.lower_print_ops(module)
hcl_d.lower_anywidth_int(module)
# hcl_d.lower_anywidth_int(module)
# Note: lower_any_width_int should precede
# move_return_to_input, because it uses input/output
# type hints.
hcl_d.move_return_to_input(module)
hcl_d.lower_bit_ops(module)
# print(module)
hcl_d.legalize_cast(module)
hcl_d.remove_stride_map(module)
pipeline = "lower-affine,func.func(buffer-loop-hoisting)"
Expand Down
2 changes: 1 addition & 1 deletion heterocl/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __call__(self, *argv):
argv[len(op.arguments) + i].np_array = np.pad(
argv[len(op.arguments) + i].np_array, pad_shape
)
execute_llvm_backend(self.src, self.name, self.return_num, *argv)
execute_llvm_backend(self.src, self.name, *argv)
for res, shape in original_results:
slicing = []
for s in shape:
Expand Down
43 changes: 23 additions & 20 deletions heterocl/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@
import subprocess
import ctypes
import time
import numpy as np
import warnings

from hcl_mlir import runtime as rt
from .report import parse_xml

# Filter out the warning from numpy when using ctypes array as numpy array.
# This is a Python bug, see:
# https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array
warnings.filterwarnings(
"ignore",
category=RuntimeWarning,
message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*",
)


def run_process(cmd, pattern=None):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
Expand Down Expand Up @@ -111,32 +120,26 @@ def execute_fpga_backend(target, shell=True):
raise RuntimeError("Not implemented")


def execute_llvm_backend(execution_engine, name, return_num, *argv):
def execute_llvm_backend(execution_engine, name, *argv):
"""
- execution_engine: mlir.ExecutionEngine object, created in hcl.build
- name: str, device top-level function name
- return_num: int, the number of return values
- argv: list-like object, a list of input and output variables
Execute LLVM backend. Assume all return args have been moved to
input args.
----------
execution_engine: mlir.ExecutionEngine
JIT object, created in hcl.build
name: str
device top-level function name
argv: list-like object
a list of input and output variables
"""
if not isinstance(argv, list):
argv = list(argv)

# Unwrap hcl Array to get numpy arrays
argv_np = [arg.unwrap() for arg in argv]
# Extract output arrays
return_args = argv_np[-return_num:]
# Convert output variables from numpy arrays to memref pointers
return_pointers = []
for arg in return_args:
memref = rt.get_ranked_memref_descriptor(arg)
return_pointers.append(ctypes.pointer(ctypes.pointer(memref)))
# Convert input variables from numpy arrays to memref pointers
arg_pointers = []
for arg in argv_np[0:-return_num]:
for arg in argv_np:
memref = rt.get_ranked_memref_descriptor(arg)
arg_pointers.append(ctypes.pointer(ctypes.pointer(memref)))
# Invoke device top-level function
execution_engine.invoke(name, *return_pointers, *arg_pointers)
# Copy output arrays back
for i, return_p in enumerate(return_pointers):
out_array = rt.ranked_memref_to_numpy(return_p[0])
np.copyto(argv[-(len(return_args) - i)].np_array, out_array)
execution_engine.invoke(name, *arg_pointers)
Loading