From c64cfaec49afaf748f0343f89654ea85e98b6715 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Tue, 7 Mar 2023 02:07:00 -0500 Subject: [PATCH 01/15] [Util] Move make_anywidth_numpy_array to utils --- heterocl/ast/ir_builder.py | 60 +++---------------------------- heterocl/utils.py | 72 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 56 deletions(-) diff --git a/heterocl/ast/ir_builder.py b/heterocl/ast/ir_builder.py index 2abbca30..b43e24c5 100644 --- a/heterocl/ast/ir_builder.py +++ b/heterocl/ast/ir_builder.py @@ -52,7 +52,7 @@ from . import ast from ..context import get_context, get_location -from ..utils import hcl_dtype_to_mlir, get_extra_type_hints +from ..utils import hcl_dtype_to_mlir, get_extra_type_hints, make_anywidth_numpy_array from .. import types as htypes from . import build_cleaner @@ -1410,61 +1410,9 @@ def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip): dtype = hcl_dtype_to_mlir(op.dtype, signless=True) shape = op.values.shape if isinstance(op.dtype, (htypes.Int, htypes.UInt)): - # The following code has several steps to convert the numpy array to have - # the correct data type in order to create an MLIR constant tensor. - # Since MLIR-NumPy Python interface only supports byte-addressable data types, - # we need to change the data type of the array to have the minimum number of bytes - # that can represent the target bitwidth. - # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array) - # which requires 20 bits (3 bytes) to represent each element - # declaration: 6*6*i20 - # numpy input: 6*6*i64 - # 1. Decompose the original i32 or i64 array into a structured array of uint8 - # -> decompose: 6*6*8*i8 - if op.dtype.bits == 1: - val = op.values - array = np.packbits(val, axis=None, bitorder="little") - value_attr = DenseElementsAttr.get(array, shape=val.shape, type=dtype) - else: - # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc. - # are the field names, and the entire data type is `op.values.dtype`. - # This can be viewed as a `union` type in C/C++. - # Please refer to the documentation for more details: - # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types - decomposed_np_dtype = np.dtype( - ( - op.values.dtype, - { - f"f{i}": (np.uint8, i) - for i in range(op.values.dtype.itemsize) - }, - ) - ) - val = op.values.view(decomposed_np_dtype) - # 2. Compose the uint8 array into a structured array of target bitwidth - # This is done by taking the first several bytes of the uint8 array - # "u1" means one unsigned byte, and "i1" means one signed byte - n_bytes = int(np.ceil(dtype.width / 8)) - new_dtype = np.dtype( - { - "names": [f"f{i}" for i in range(n_bytes)], - "formats": (["i1"] if isinstance(dtype, htypes.Int) else ["u1"]) - + ["u1"] * (n_bytes - 1), - "offsets": list(range(n_bytes)), - "itemize": n_bytes, - } - ) - # -> compose: 6*6*3*i8 - val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1) - # -> flatten: 108*i8 - val = val.flatten() - # -> view: 36*i24 - val = val.view(np.dtype(new_dtype)) - # -> reshape: 6*6*i24 - val = val.reshape(shape) - # Pass in the numpy array to get the MLIR attribute - # -> result: 6*6*i20 - value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype) + signed = isinstance(op.dtype, htypes.Int) + val = make_anywidth_numpy_array(op.values, op.dtype.bits, signed) + value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype) else: val = op.values value_attr = DenseElementsAttr.get(val) diff --git a/heterocl/utils.py b/heterocl/utils.py index 68116a74..1d416529 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -219,3 +219,75 @@ def get_max_value(dtype): if isinstance(dtype, UFixed): return (1 << dtype.bits) - 1 raise DTypeError(f"Unrecognized data type: {dtype}") + + +def make_anywidth_numpy_array(val, bitwidth, signed): + """ + Converts a numpy array to any target bitwidth. + ---------------- + Parameters: + val: numpy.ndarray + numpy array, can be any numpy native bitwidth, e.g. np.int64 + bitwidth: int + target bitwidth e.g. 9, 31, 198 + signed: True or False + whether the values in the array are signed or unsigned + ---------------- + Returns: + numpy.ndarray + numpy array with the target bitwidth + """ + shape = val.shape + # The following code has several steps to convert the numpy array to have + # the correct data type in order to create an MLIR constant tensor. + # Since MLIR-NumPy Python interface only supports byte-addressable data types, + # we need to change the data type of the array to have the minimum number of bytes + # that can represent the target bitwidth. + # e.g., hcl.const_tensor(arr, dtype=hcl.Int(20)) (6*6 array) + # which requires 20 bits (3 bytes) to represent each element + # declaration: 6*6*i20 + # numpy input: 6*6*i64 + # 1. Decompose the original i32 or i64 array into a structured array of uint8 + # -> decompose: 6*6*8*i8 + if bitwidth == 1: + return np.packbits(val, axis=None, bitorder="little") + else: + # Here we construct a customized NumPy dtype, "f0", "f1", "f2", etc. + # are the field names, and the entire data type is `op.values.dtype`. + # This can be viewed as a `union` type in C/C++. + # Please refer to the documentation for more details: + # https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types + decomposed_np_dtype = np.dtype( + ( + val.dtype, + { + f"f{i}": (np.uint8, i) + for i in range(val.dtype.itemsize) + }, + ) + ) + val = val.view(decomposed_np_dtype) + # 2. Compose the uint8 array into a structured array of target bitwidth + # This is done by taking the first several bytes of the uint8 array + # "u1" means one unsigned byte, and "i1" means one signed byte + n_bytes = int(np.ceil(bitwidth / 8)) + new_dtype = np.dtype( + { + "names": [f"f{i}" for i in range(n_bytes)], + "formats": (["i1"] if signed else ["u1"]) + + ["u1"] * (n_bytes - 1), + "offsets": list(range(n_bytes)), + "itemize": n_bytes, + } + ) + # -> compose: 6*6*3*i8 + val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1) + # -> flatten: 108*i8 + val = val.flatten() + # -> view: 36*i24 + val = val.view(np.dtype(new_dtype)) + # -> reshape: 6*6*i24 + val = val.reshape(shape) + # Pass in the numpy array to get the MLIR attribute + # -> result: 6*6*i20 + return val \ No newline at end of file From 07e430c41116e894ce0da5baa49b8165a392f019 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Tue, 7 Mar 2023 15:22:56 -0500 Subject: [PATCH 02/15] [IRBuilder] Fix shape issue with DenseElementsAttr creation --- heterocl/ast/ir_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heterocl/ast/ir_builder.py b/heterocl/ast/ir_builder.py index b43e24c5..ffa820e7 100644 --- a/heterocl/ast/ir_builder.py +++ b/heterocl/ast/ir_builder.py @@ -1412,7 +1412,7 @@ def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip): if isinstance(op.dtype, (htypes.Int, htypes.UInt)): signed = isinstance(op.dtype, htypes.Int) val = make_anywidth_numpy_array(op.values, op.dtype.bits, signed) - value_attr = DenseElementsAttr.get(val, shape=val.shape, type=dtype) + value_attr = DenseElementsAttr.get(val, shape=op.values.shape, type=dtype) else: val = op.values value_attr = DenseElementsAttr.get(val) From c5fee64344a65fca34e0e0241362386bc76dc882 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Tue, 7 Mar 2023 16:07:48 -0500 Subject: [PATCH 03/15] Reconstructing LLVM backend runtime --- heterocl/build_module.py | 3 +-- heterocl/runtime.py | 38 ++++++++++++++++++++++++++++++++++---- heterocl/tensor.py | 5 +++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/heterocl/build_module.py b/heterocl/build_module.py index 8a66bd68..91124927 100644 --- a/heterocl/build_module.py +++ b/heterocl/build_module.py @@ -337,13 +337,12 @@ def attach_llvm_attrs(module): hcl_d.lower_composite_type(module) hcl_d.lower_fixed_to_int(module) hcl_d.lower_print_ops(module) - hcl_d.lower_anywidth_int(module) + # hcl_d.lower_anywidth_int(module) # Note: lower_any_width_int should precede # move_return_to_input, because it uses input/output # type hints. hcl_d.move_return_to_input(module) hcl_d.lower_bit_ops(module) - # print(module) hcl_d.legalize_cast(module) hcl_d.remove_stride_map(module) pipeline = "lower-affine,func.func(buffer-loop-hoisting)" diff --git a/heterocl/runtime.py b/heterocl/runtime.py index f4ef694e..ea2c4ff4 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -111,7 +111,7 @@ def execute_fpga_backend(target, shell=True): raise RuntimeError("Not implemented") -def execute_llvm_backend(execution_engine, name, return_num, *argv): +def execute_llvm_backend_obsolete(execution_engine, name, return_num, *argv): """ - execution_engine: mlir.ExecutionEngine object, created in hcl.build - name: str, device top-level function name @@ -137,6 +137,36 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): # Invoke device top-level function execution_engine.invoke(name, *return_pointers, *arg_pointers) # Copy output arrays back - for i, return_p in enumerate(return_pointers): - out_array = rt.ranked_memref_to_numpy(return_p[0]) - np.copyto(argv[-(len(return_args) - i)].np_array, out_array) + # might be unnecessary + # for i, return_p in enumerate(return_pointers): + # out_array = rt.ranked_memref_to_numpy(return_p[0]) + # np.copyto(argv[-(len(return_args) - i)].np_array, out_array) # problem here + +def execute_llvm_backend(execution_engine, name, return_num, *argv): + """ + Execute LLVM backend. Assume all return args have been moved to + input args. + ---------- + execution_engine: mlir.ExecutionEngine + JIT object, created in hcl.build + name: str + device top-level function name + argv: list-like object + a list of input and output variables + """ + # TODO: remove return_num + if not isinstance(argv, list): + argv = list(argv) + + # Unwrap hcl Array to get numpy arrays + argv_np = [arg.unwrap() for arg in argv] + arg_pointers = [] + for arg in argv_np: + memref = rt.get_ranked_memref_descriptor(arg) + arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) + # Invoke device top-level function + execution_engine.invoke(name, *arg_pointers) + # this part is still necessary + # comment out for now + # for i, arg_p in enumerate(arg_pointers): + # out_array = rt.ranked_memref_to_numpy(arg_p[0]) \ No newline at end of file diff --git a/heterocl/tensor.py b/heterocl/tensor.py index e161ea43..6efe8a5a 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -5,6 +5,7 @@ from hcl_mlir.exceptions import DTypeError from .types import dtype_to_str, Int, UInt, Float, Fixed, UFixed +from .utils import make_anywidth_numpy_array class Array: @@ -81,6 +82,10 @@ def asnumpy(self): return self.np_array def unwrap(self): + if isinstance(self.dtype, (Int, Fixed)): + return make_anywidth_numpy_array(self.np_array, self.dtype.bits, True) + elif isinstance(self.dtype, (UInt, UFixed)): + return make_anywidth_numpy_array(self.np_array, self.dtype.bits, False) return self.np_array def __repr__(self) -> str: From 7dcf4df91aeca3ac44502b6489a5b53c1f7dd6ab Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Mon, 13 Mar 2023 15:48:23 -0400 Subject: [PATCH 04/15] [Util] Remove np.int128, np.int256, since they don't exist --- heterocl/utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/heterocl/utils.py b/heterocl/utils.py index 1d416529..be518479 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -143,10 +143,6 @@ def make_const_tensor(val, dtype): np_dtype = np.int32 elif dtype.bits <= 64: np_dtype = np.int64 - elif dtype.bits <= 128: - np_dtype = np.int128 - elif dtype.bits <= 256: - np_dtype = np.int256 else: raise DTypeError( f"Integer width ({dtype}) too large, not supported by numpy" @@ -277,7 +273,7 @@ def make_anywidth_numpy_array(val, bitwidth, signed): "formats": (["i1"] if signed else ["u1"]) + ["u1"] * (n_bytes - 1), "offsets": list(range(n_bytes)), - "itemize": n_bytes, + "itemize": n_bytes, # should this be itemsize? } ) # -> compose: 6*6*3*i8 From fddd444fbdcd377ad900a8f0ae53b679f93863aa Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 04:11:28 -0400 Subject: [PATCH 05/15] [Array] Extend hcl.array to support any bitwidth --- heterocl/runtime.py | 23 ++++- heterocl/tensor.py | 204 +++++++++++++++++++++++++++++++------------- heterocl/utils.py | 4 +- 3 files changed, 167 insertions(+), 64 deletions(-) diff --git a/heterocl/runtime.py b/heterocl/runtime.py index ea2c4ff4..e4d7a0c6 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -12,6 +12,12 @@ from hcl_mlir import runtime as rt from .report import parse_xml +# Filter out the warning from numpy when using ctypes array as numpy array. +# This is a Python bug, see: +# https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array +import warnings +warnings.filterwarnings("ignore", category=RuntimeWarning, message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*") + def run_process(cmd, pattern=None): p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) @@ -168,5 +174,18 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): execution_engine.invoke(name, *arg_pointers) # this part is still necessary # comment out for now - # for i, arg_p in enumerate(arg_pointers): - # out_array = rt.ranked_memref_to_numpy(arg_p[0]) \ No newline at end of file + # print(arg_pointers[0][0][0].aligned) + # print(f"is ctypes._Pointer: {isinstance(arg_pointers[0][0][0].aligned, ctypes._Pointer)}") + # print(arg_pointers[1][0][0].aligned) + # print(f"is ctypes._Pointer: {isinstance(arg_pointers[1][0][0].aligned, ctypes._Pointer)}") + for i, arg_p in enumerate(arg_pointers): + np_arr = np.ctypeslib.as_array( + arg_p[0][0].aligned, shape=arg_p[0][0].shape) + strided_arr = np.lib.stride_tricks.as_strided( + np_arr, + np.ctypeslib.as_array(arg_p[0][0].shape), + np.ctypeslib.as_array(arg_p[0][0].strides) * np_arr.itemsize, + ) + out_array = strided_arr + # out_array = rt.ranked_memref_to_numpy(arg_p[0]) # can confirm that it works with any bitwidth array + np.copyto(argv[i].np_array, out_array) # target, source \ No newline at end of file diff --git a/heterocl/tensor.py b/heterocl/tensor.py index 6efe8a5a..b326932b 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -2,70 +2,69 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -from hcl_mlir.exceptions import DTypeError +import math +from hcl_mlir.exceptions import DTypeError, APIError, DTypeWarning from .types import dtype_to_str, Int, UInt, Float, Fixed, UFixed from .utils import make_anywidth_numpy_array class Array: - """A wrapper class for numpy array - Differences between array and tensor: - tensor is only a placeholder while array holds actual values + """ + Represents a input tensor in HeteroCL. + This class is a wrapper of numpy.ndarray, but it also + support a wider range of data types, including any-width + integer and fixed-point data types. """ - def __init__(self, np_array, dtype): - self.dtype = dtype # should specify the type of `dtype` - if isinstance(np_array, list): - np_array = np.array(np_array) - if dtype is not None: - # Data type check - if isinstance(dtype, Float): - hcl_dtype_str = dtype_to_str(dtype) - correct_dtype = np.dtype(hcl_dtype_str) - if np_array.dtype != correct_dtype: - np_array = np_array.astype(correct_dtype) - elif isinstance(dtype, Int): - # Handle overflow - sb = 1 << self.dtype.bits - sb_limit = 1 << (self.dtype.bits - 1) - np_array = np_array % sb - - def cast_func(x): - return x if x < sb_limit else x - sb - - vec_np_array = np.vectorize(cast_func)(np_array) - np_array = vec_np_array.astype(np.uint64) - elif isinstance(dtype, UInt): - # Handle overflow - sb = 1 << self.dtype.bits - np_array = np_array % sb - np_array = np_array.astype(np.uint64) - elif isinstance(dtype, Fixed): - # Handle overflow - sb = 1 << self.dtype.bits - sb_limit = 1 << (self.dtype.bits - 1) - np_array = np_array * (2**dtype.fracs) - np_array = np.fix(np_array) % sb - - def cast_func(x): - return x if x < sb_limit else x - sb - - vec_np_array = np.vectorize(cast_func)(np_array) - np_array = vec_np_array.astype(np.uint64) - elif isinstance(dtype, UFixed): - # Handle overflow - sb = 1 << self.dtype.bits - np_array = np_array * (2**dtype.fracs) - np_array = np.fix(np_array) % sb - np_array = np_array.astype(np.uint64) - else: - raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) - else: - raise RuntimeError("Should provide type info") - self.np_array = np_array - + def __init__(self, array, dtype): + """ + Parameters + ---------- + array : numpy.ndarray or a python list + The array to be wrapped. + If the bitwidth of the data type is wider than 64, + the array should be a python list. + dtype : HeteroCL data type + """ + self.dtype = dtype + if dtype is None: + raise APIError("Should provide type info") + # self.np_array: a numpy array that holds the data + # For float type, self.np_array is a float type numpy array + # For int, uint, fixed, ufixed, self.np_array is a struct type numpy array + # with each field being a byte. + self.np_array = self._handle_overflow(array, dtype) + if not isinstance(dtype, Float): + signed = isinstance(dtype, Int) or isinstance(dtype, Fixed) + # closest power of 2 + bitwidth = 1 << (self.dtype.bits - 1).bit_length() + if bitwidth < 8: bitwidth = 8 + # this is to be compliant with MLIR's anywidth type representation + # e.g. i1-i8 -> int8 + # i9-i16 -> int16 + # i17-i32 -> int32 + # i33-i64 -> int64 + # i65-i128 -> int128 + # i129-i256 -> int256 + self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth, signed) + def asnumpy(self): + """ + Convert HeteroCL array to numpy array / python list. + If the bitwidth is wider than 64, the result will be a python list. + Otherwise, return a numpy array. + """ + if isinstance(self.dtype, Float): + hcl_dtype_str = dtype_to_str(self.dtype) + np_dtype = np.dtype(hcl_dtype_str) + res_array = self.np_array.astype(np_dtype) + return res_array + elif isinstance(self.dtype, Int): + if self.dtype.bits > 64: + DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + return self._struct_np_array_to_int() + if isinstance(self.dtype, (Fixed, UFixed)): if isinstance(self.dtype, Fixed): res_array = self.np_array.astype(np.int64) @@ -82,11 +81,98 @@ def asnumpy(self): return self.np_array def unwrap(self): - if isinstance(self.dtype, (Int, Fixed)): - return make_anywidth_numpy_array(self.np_array, self.dtype.bits, True) - elif isinstance(self.dtype, (UInt, UFixed)): - return make_anywidth_numpy_array(self.np_array, self.dtype.bits, False) return self.np_array + + def _handle_overflow(self, array, dtype): + """ + If the dtype is wider than 64 bits, + array should a list of numpy numbers. + """ + # Data type check + if isinstance(dtype, Float): + if isinstance(array, list): + array = np.array(array) + hcl_dtype_str = dtype_to_str(dtype) + correct_dtype = np.dtype(hcl_dtype_str) + if array.dtype != correct_dtype: + array = array.astype(correct_dtype) + elif isinstance(dtype, Int): + sb = 1 << self.dtype.bits + sb_limit = 1 << (self.dtype.bits - 1) + array = array % sb # cap the value to the max value of the bitwidth + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + # signed integer overflow function: wrap mode + return x if x < sb_limit else x - sb + if isinstance(array, list): + array = [cast_func(x) for x in array] # TODO: this should be tested independently + else: + array = np.vectorize(cast_func)(array) + elif isinstance(dtype, UInt): + # Handle overflow + sb = 1 << self.dtype.bits + array = array % sb + elif isinstance(dtype, Fixed): + # Handle overflow + sb = 1 << self.dtype.bits + sb_limit = 1 << (self.dtype.bits - 1) + array = array * (2**dtype.fracs) + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + x = math.trunc(x) % sb # rounds towards zero + # signed integer overflow function: wrap mode + return x if x < sb_limit else x - sb + if isinstance(array, list): + array = [cast_func(x) for x in array] + else: + array = np.vectorize(cast_func)(array) + elif isinstance(dtype, UFixed): + # Handle overflow + sb = 1 << self.dtype.bits + array = array * (2**dtype.fracs) + def cast_func(x): + # recursive + if isinstance(x, list): + return [cast_func(y) for y in x] + x = math.trunc(x) % sb # rounds towards zero + return x + if isinstance(array, list): + array = [cast_func(x) for x in array] + else: + array = np.vectorize(cast_func)(array) + else: + raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) + return array + + + def _struct_np_array_to_int(self): + pylist = self.np_array.tolist() + # each element is a tuple + def to_int(x): + if isinstance(x, list): + return [to_int(y) for y in x] + # concatenate the tuple + # each element is a byte + signed = isinstance(self.dtype, (Int, Fixed)) + byte_str = b'' + byte_str += x[0].to_bytes(1, byteorder='little', signed=signed) + for i in range(1, len(x)): + byte_str += x[i].to_bytes(1, byteorder='little', signed=False) + value = int.from_bytes(byte_str, byteorder='little', signed=signed) + # handle signed negative int: equivalent to sign extension + if signed and value >= (1 << (self.dtype.bits - 1)): + value -= (1 << self.dtype.bits) + return value + pylist = to_int(pylist) + if self.dtype.bits <= 64: + return np.array(pylist, dtype=np.int64) + else: + return pylist + def __repr__(self) -> str: return self.asnumpy().__repr__() diff --git a/heterocl/utils.py b/heterocl/utils.py index be518479..90de9d34 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -273,7 +273,7 @@ def make_anywidth_numpy_array(val, bitwidth, signed): "formats": (["i1"] if signed else ["u1"]) + ["u1"] * (n_bytes - 1), "offsets": list(range(n_bytes)), - "itemize": n_bytes, # should this be itemsize? + "itemsize": n_bytes, # should this be itemsize? } ) # -> compose: 6*6*3*i8 @@ -284,6 +284,4 @@ def make_anywidth_numpy_array(val, bitwidth, signed): val = val.view(np.dtype(new_dtype)) # -> reshape: 6*6*i24 val = val.reshape(shape) - # Pass in the numpy array to get the MLIR attribute - # -> result: 6*6*i20 return val \ No newline at end of file From 172772b84426c6c6c4f1b0cd36f3da044d08d809 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 11:25:23 -0400 Subject: [PATCH 06/15] [Array] Add sign extension --- heterocl/runtime.py | 16 +------------ heterocl/tensor.py | 55 +++++++++++++++++++++++++++------------------ heterocl/utils.py | 16 +++++++++---- 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/heterocl/runtime.py b/heterocl/runtime.py index e4d7a0c6..c7901254 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -172,20 +172,6 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) # Invoke device top-level function execution_engine.invoke(name, *arg_pointers) - # this part is still necessary - # comment out for now - # print(arg_pointers[0][0][0].aligned) - # print(f"is ctypes._Pointer: {isinstance(arg_pointers[0][0][0].aligned, ctypes._Pointer)}") - # print(arg_pointers[1][0][0].aligned) - # print(f"is ctypes._Pointer: {isinstance(arg_pointers[1][0][0].aligned, ctypes._Pointer)}") for i, arg_p in enumerate(arg_pointers): - np_arr = np.ctypeslib.as_array( - arg_p[0][0].aligned, shape=arg_p[0][0].shape) - strided_arr = np.lib.stride_tricks.as_strided( - np_arr, - np.ctypeslib.as_array(arg_p[0][0].shape), - np.ctypeslib.as_array(arg_p[0][0].strides) * np_arr.itemsize, - ) - out_array = strided_arr - # out_array = rt.ranked_memref_to_numpy(arg_p[0]) # can confirm that it works with any bitwidth array + out_array = rt.ranked_memref_to_numpy(arg_p[0]) np.copyto(argv[i].np_array, out_array) # target, source \ No newline at end of file diff --git a/heterocl/tensor.py b/heterocl/tensor.py index b326932b..56d01ca4 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -64,21 +64,22 @@ def asnumpy(self): if self.dtype.bits > 64: DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") return self._struct_np_array_to_int() - - if isinstance(self.dtype, (Fixed, UFixed)): - if isinstance(self.dtype, Fixed): - res_array = self.np_array.astype(np.int64) - else: - res_array = self.np_array - res_array = res_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) - return res_array - if isinstance(self.dtype, Int): - res_array = self.np_array.astype(np.int64) - return res_array - if isinstance(self.dtype, Float): - res_array = self.np_array.astype(float) - return res_array - return self.np_array + elif isinstance(self.dtype, UInt): + if self.dtype.bits > 64: + DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + return self._struct_np_array_to_int() + elif isinstance(self.dtype, Fixed): + if self.dtype.bits > 64: + DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + base_array = self._struct_np_array_to_int() + return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + elif isinstance(self.dtype, UFixed): + if self.dtype.bits > 64: + DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + base_array = self._struct_np_array_to_int() + return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + else: + raise DTypeError(f"Unsupported data type {self.dtype}") def unwrap(self): return self.np_array @@ -100,12 +101,12 @@ def _handle_overflow(self, array, dtype): elif isinstance(dtype, Int): sb = 1 << self.dtype.bits sb_limit = 1 << (self.dtype.bits - 1) - array = array % sb # cap the value to the max value of the bitwidth def cast_func(x): # recursive if isinstance(x, list): return [cast_func(y) for y in x] # signed integer overflow function: wrap mode + x = x % sb # cap the value to the max value of the bitwidth return x if x < sb_limit else x - sb if isinstance(array, list): array = [cast_func(x) for x in array] # TODO: this should be tested independently @@ -156,17 +157,27 @@ def _struct_np_array_to_int(self): def to_int(x): if isinstance(x, list): return [to_int(y) for y in x] + signed = isinstance(self.dtype, (Int, Fixed)) + # turn x from tuple to list + x = list(x) + # find MSB + byte_idx = (self.dtype.bits - 1) // 8 + bit_idx = (self.dtype.bits - 1) % 8 + msb = (x[byte_idx] & (1 << bit_idx)) > 0 + # sign extension + if signed and msb: + x[byte_idx] |= ((0xff << bit_idx) & 0xff) + for i in range(byte_idx + 1, len(x)): + x[i] = 0xff # concatenate the tuple # each element is a byte - signed = isinstance(self.dtype, (Int, Fixed)) byte_str = b'' - byte_str += x[0].to_bytes(1, byteorder='little', signed=signed) - for i in range(1, len(x)): + for i in range(len(x) - 1): + # little endian, first x-1 elements are unsigned bytes byte_str += x[i].to_bytes(1, byteorder='little', signed=False) + # last element is signed + byte_str += x[-1].to_bytes(1, byteorder='little', signed=signed) value = int.from_bytes(byte_str, byteorder='little', signed=signed) - # handle signed negative int: equivalent to sign extension - if signed and value >= (1 << (self.dtype.bits - 1)): - value -= (1 << self.dtype.bits) return value pylist = to_int(pylist) if self.dtype.bits <= 64: diff --git a/heterocl/utils.py b/heterocl/utils.py index 90de9d34..8675d54d 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -234,6 +234,8 @@ def make_anywidth_numpy_array(val, bitwidth, signed): numpy array with the target bitwidth """ shape = val.shape + sign_array = val > 0 + avail_bytes = val.itemsize # number of bytes of each element # The following code has several steps to convert the numpy array to have # the correct data type in order to create an MLIR constant tensor. # Since MLIR-NumPy Python interface only supports byte-addressable data types, @@ -266,18 +268,24 @@ def make_anywidth_numpy_array(val, bitwidth, signed): # 2. Compose the uint8 array into a structured array of target bitwidth # This is done by taking the first several bytes of the uint8 array # "u1" means one unsigned byte, and "i1" means one signed byte + # f0 is LSB, fn is MSB n_bytes = int(np.ceil(bitwidth / 8)) new_dtype = np.dtype( { "names": [f"f{i}" for i in range(n_bytes)], - "formats": (["i1"] if signed else ["u1"]) - + ["u1"] * (n_bytes - 1), + "formats": ["u1"] * (n_bytes - 1) + (["i1"] if signed else ["u1"]), "offsets": list(range(n_bytes)), - "itemsize": n_bytes, # should this be itemsize? + "itemsize": n_bytes, } ) + # sometimes the available bytes are not enough to represent the target bitwidth + # so that we need to pad the array + _bytes = [val[f"f{i}"] for i in range(min(avail_bytes, n_bytes))] + if avail_bytes < n_bytes: + padding = np.where(sign_array, 0x00, 0xFF).astype(np.uint8) + _bytes += [padding] * (n_bytes - avail_bytes) # -> compose: 6*6*3*i8 - val = np.stack([val[f"f{i}"] for i in range(n_bytes)], axis=-1) + val = np.stack(_bytes, axis=-1) # -> flatten: 108*i8 val = val.flatten() # -> view: 36*i24 From 9cd8bf8e21442e113a9e7b6046828121c801f5cc Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 14:06:27 -0400 Subject: [PATCH 07/15] [Runtime] copying back results is not necessary --- heterocl/runtime.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/heterocl/runtime.py b/heterocl/runtime.py index c7901254..8a240da7 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -172,6 +172,11 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) # Invoke device top-level function execution_engine.invoke(name, *arg_pointers) - for i, arg_p in enumerate(arg_pointers): - out_array = rt.ranked_memref_to_numpy(arg_p[0]) - np.copyto(argv[i].np_array, out_array) # target, source \ No newline at end of file + # for i, arg_p in enumerate(arg_pointers): + # out_array = rt.ranked_memref_to_numpy(arg_p[0]) + # if out_array element type has one byte, + # ranked_memref_to_numpy will automatically unpack it + # if argv[i].np_array.dtype.itemsize == 1: + # np.copyto(argv[i].np_array['f0'], out_array) + # else: + # np.copyto(argv[i].np_array, out_array) # target, source \ No newline at end of file From abea8458839f36c14e963462986eec4521341a31 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 14:06:53 -0400 Subject: [PATCH 08/15] [Array] Exclude changes in fixed type in this PR, since it needs changes in IR first --- heterocl/tensor.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/heterocl/tensor.py b/heterocl/tensor.py index 56d01ca4..e50fecb6 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -35,8 +35,8 @@ def __init__(self, array, dtype): # For int, uint, fixed, ufixed, self.np_array is a struct type numpy array # with each field being a byte. self.np_array = self._handle_overflow(array, dtype) - if not isinstance(dtype, Float): - signed = isinstance(dtype, Int) or isinstance(dtype, Fixed) + if isinstance(dtype, (Int, UInt)): + signed = isinstance(dtype, Int) # closest power of 2 bitwidth = 1 << (self.dtype.bits - 1).bit_length() if bitwidth < 8: bitwidth = 8 @@ -68,16 +68,20 @@ def asnumpy(self): if self.dtype.bits > 64: DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") return self._struct_np_array_to_int() + #TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now + # because a change in IR is needed to support this, leaving it to another PR elif isinstance(self.dtype, Fixed): if self.dtype.bits > 64: DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") - base_array = self._struct_np_array_to_int() - return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + # base_array = self._struct_np_array_to_int() + # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) elif isinstance(self.dtype, UFixed): if self.dtype.bits > 64: DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") - base_array = self._struct_np_array_to_int() - return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + # base_array = self._struct_np_array_to_int() + # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) + return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) else: raise DTypeError(f"Unsupported data type {self.dtype}") From 340ccecb9c645f0e67ec7013b0b67e8626d6b59b Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 14:24:56 -0400 Subject: [PATCH 09/15] [Util] Remove signedness in struct numpy representation, to make sign extension easier --- heterocl/tensor.py | 5 +---- heterocl/utils.py | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/heterocl/tensor.py b/heterocl/tensor.py index e50fecb6..9930fc19 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -176,11 +176,8 @@ def to_int(x): # concatenate the tuple # each element is a byte byte_str = b'' - for i in range(len(x) - 1): - # little endian, first x-1 elements are unsigned bytes + for i in range(len(x)): byte_str += x[i].to_bytes(1, byteorder='little', signed=False) - # last element is signed - byte_str += x[-1].to_bytes(1, byteorder='little', signed=signed) value = int.from_bytes(byte_str, byteorder='little', signed=signed) return value pylist = to_int(pylist) diff --git a/heterocl/utils.py b/heterocl/utils.py index 8675d54d..a6560054 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -273,7 +273,8 @@ def make_anywidth_numpy_array(val, bitwidth, signed): new_dtype = np.dtype( { "names": [f"f{i}" for i in range(n_bytes)], - "formats": ["u1"] * (n_bytes - 1) + (["i1"] if signed else ["u1"]), + # "formats": ["u1"] * (n_bytes - 1) + (["i1"] if signed else ["u1"]), + "formats": ["u1"] * n_bytes, "offsets": list(range(n_bytes)), "itemsize": n_bytes, } From 9a28e52ecbafce34c22b9e8a9867b97d04f848ca Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 14:31:46 -0400 Subject: [PATCH 10/15] [Array] Fix issue with fixed type overflow handling --- heterocl/tensor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/heterocl/tensor.py b/heterocl/tensor.py index 9930fc19..3ab1d3ce 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -124,6 +124,7 @@ def cast_func(x): # Handle overflow sb = 1 << self.dtype.bits sb_limit = 1 << (self.dtype.bits - 1) + array = array.astype(np.float64) array = array * (2**dtype.fracs) def cast_func(x): # recursive @@ -136,9 +137,11 @@ def cast_func(x): array = [cast_func(x) for x in array] else: array = np.vectorize(cast_func)(array) + array = array.astype(np.int64) elif isinstance(dtype, UFixed): # Handle overflow sb = 1 << self.dtype.bits + array = array.astype(np.float64) array = array * (2**dtype.fracs) def cast_func(x): # recursive @@ -150,6 +153,7 @@ def cast_func(x): array = [cast_func(x) for x in array] else: array = np.vectorize(cast_func)(array) + array = array.astype(np.int64) else: raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) return array From e930edbde3a43ff12a3d6a4d87e15119c513fd43 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 15:11:25 -0400 Subject: [PATCH 11/15] Format with black --- heterocl/runtime.py | 34 +++++++++++++++++------------ heterocl/tensor.py | 53 ++++++++++++++++++++++++++++++--------------- heterocl/utils.py | 9 +++----- 3 files changed, 58 insertions(+), 38 deletions(-) diff --git a/heterocl/runtime.py b/heterocl/runtime.py index 8a240da7..341fb658 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -16,7 +16,12 @@ # This is a Python bug, see: # https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array import warnings -warnings.filterwarnings("ignore", category=RuntimeWarning, message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*") + +warnings.filterwarnings( + "ignore", + category=RuntimeWarning, + message="A builtin ctypes object gave a PEP3118 format string that does not match its itemsize*", +) def run_process(cmd, pattern=None): @@ -145,15 +150,16 @@ def execute_llvm_backend_obsolete(execution_engine, name, return_num, *argv): # Copy output arrays back # might be unnecessary # for i, return_p in enumerate(return_pointers): - # out_array = rt.ranked_memref_to_numpy(return_p[0]) - # np.copyto(argv[-(len(return_args) - i)].np_array, out_array) # problem here + # out_array = rt.ranked_memref_to_numpy(return_p[0]) + # np.copyto(argv[-(len(return_args) - i)].np_array, out_array) # problem here + def execute_llvm_backend(execution_engine, name, return_num, *argv): """ - Execute LLVM backend. Assume all return args have been moved to - input args. + Execute LLVM backend. Assume all return args have been moved to + input args. ---------- - execution_engine: mlir.ExecutionEngine + execution_engine: mlir.ExecutionEngine JIT object, created in hcl.build name: str device top-level function name @@ -163,7 +169,7 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): # TODO: remove return_num if not isinstance(argv, list): argv = list(argv) - + # Unwrap hcl Array to get numpy arrays argv_np = [arg.unwrap() for arg in argv] arg_pointers = [] @@ -173,10 +179,10 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): # Invoke device top-level function execution_engine.invoke(name, *arg_pointers) # for i, arg_p in enumerate(arg_pointers): - # out_array = rt.ranked_memref_to_numpy(arg_p[0]) - # if out_array element type has one byte, - # ranked_memref_to_numpy will automatically unpack it - # if argv[i].np_array.dtype.itemsize == 1: - # np.copyto(argv[i].np_array['f0'], out_array) - # else: - # np.copyto(argv[i].np_array, out_array) # target, source \ No newline at end of file + # out_array = rt.ranked_memref_to_numpy(arg_p[0]) + # if out_array element type has one byte, + # ranked_memref_to_numpy will automatically unpack it + # if argv[i].np_array.dtype.itemsize == 1: + # np.copyto(argv[i].np_array['f0'], out_array) + # else: + # np.copyto(argv[i].np_array, out_array) # target, source diff --git a/heterocl/tensor.py b/heterocl/tensor.py index 3ab1d3ce..00cbdfbd 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -39,7 +39,8 @@ def __init__(self, array, dtype): signed = isinstance(dtype, Int) # closest power of 2 bitwidth = 1 << (self.dtype.bits - 1).bit_length() - if bitwidth < 8: bitwidth = 8 + if bitwidth < 8: + bitwidth = 8 # this is to be compliant with MLIR's anywidth type representation # e.g. i1-i8 -> int8 # i9-i16 -> int16 @@ -48,7 +49,7 @@ def __init__(self, array, dtype): # i65-i128 -> int128 # i129-i256 -> int256 self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth, signed) - + def asnumpy(self): """ Convert HeteroCL array to numpy array / python list. @@ -62,23 +63,31 @@ def asnumpy(self): return res_array elif isinstance(self.dtype, Int): if self.dtype.bits > 64: - DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ) return self._struct_np_array_to_int() elif isinstance(self.dtype, UInt): if self.dtype.bits > 64: - DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ) return self._struct_np_array_to_int() - #TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now + # TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now # because a change in IR is needed to support this, leaving it to another PR elif isinstance(self.dtype, Fixed): if self.dtype.bits > 64: - DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ) # base_array = self._struct_np_array_to_int() # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) elif isinstance(self.dtype, UFixed): if self.dtype.bits > 64: - DTypeWarning(f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list") + DTypeWarning( + f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" + ) # base_array = self._struct_np_array_to_int() # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) @@ -88,7 +97,6 @@ def asnumpy(self): def unwrap(self): return self.np_array - def _handle_overflow(self, array, dtype): """ If the dtype is wider than 64 bits, @@ -105,15 +113,19 @@ def _handle_overflow(self, array, dtype): elif isinstance(dtype, Int): sb = 1 << self.dtype.bits sb_limit = 1 << (self.dtype.bits - 1) + def cast_func(x): # recursive if isinstance(x, list): return [cast_func(y) for y in x] # signed integer overflow function: wrap mode - x = x % sb # cap the value to the max value of the bitwidth + x = x % sb # cap the value to the max value of the bitwidth return x if x < sb_limit else x - sb + if isinstance(array, list): - array = [cast_func(x) for x in array] # TODO: this should be tested independently + array = [ + cast_func(x) for x in array + ] # TODO: this should be tested independently else: array = np.vectorize(cast_func)(array) elif isinstance(dtype, UInt): @@ -126,13 +138,15 @@ def cast_func(x): sb_limit = 1 << (self.dtype.bits - 1) array = array.astype(np.float64) array = array * (2**dtype.fracs) + def cast_func(x): # recursive if isinstance(x, list): return [cast_func(y) for y in x] - x = math.trunc(x) % sb # rounds towards zero + x = math.trunc(x) % sb # rounds towards zero # signed integer overflow function: wrap mode return x if x < sb_limit else x - sb + if isinstance(array, list): array = [cast_func(x) for x in array] else: @@ -143,12 +157,14 @@ def cast_func(x): sb = 1 << self.dtype.bits array = array.astype(np.float64) array = array * (2**dtype.fracs) + def cast_func(x): # recursive if isinstance(x, list): return [cast_func(y) for y in x] - x = math.trunc(x) % sb # rounds towards zero + x = math.trunc(x) % sb # rounds towards zero return x + if isinstance(array, list): array = [cast_func(x) for x in array] else: @@ -157,10 +173,10 @@ def cast_func(x): else: raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) return array - def _struct_np_array_to_int(self): pylist = self.np_array.tolist() + # each element is a tuple def to_int(x): if isinstance(x, list): @@ -174,16 +190,17 @@ def to_int(x): msb = (x[byte_idx] & (1 << bit_idx)) > 0 # sign extension if signed and msb: - x[byte_idx] |= ((0xff << bit_idx) & 0xff) + x[byte_idx] |= (0xFF << bit_idx) & 0xFF for i in range(byte_idx + 1, len(x)): - x[i] = 0xff + x[i] = 0xFF # concatenate the tuple # each element is a byte - byte_str = b'' + byte_str = b"" for i in range(len(x)): - byte_str += x[i].to_bytes(1, byteorder='little', signed=False) - value = int.from_bytes(byte_str, byteorder='little', signed=signed) + byte_str += x[i].to_bytes(1, byteorder="little", signed=False) + value = int.from_bytes(byte_str, byteorder="little", signed=signed) return value + pylist = to_int(pylist) if self.dtype.bits <= 64: return np.array(pylist, dtype=np.int64) diff --git a/heterocl/utils.py b/heterocl/utils.py index a6560054..db6387ed 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -235,7 +235,7 @@ def make_anywidth_numpy_array(val, bitwidth, signed): """ shape = val.shape sign_array = val > 0 - avail_bytes = val.itemsize # number of bytes of each element + avail_bytes = val.itemsize # number of bytes of each element # The following code has several steps to convert the numpy array to have # the correct data type in order to create an MLIR constant tensor. # Since MLIR-NumPy Python interface only supports byte-addressable data types, @@ -258,10 +258,7 @@ def make_anywidth_numpy_array(val, bitwidth, signed): decomposed_np_dtype = np.dtype( ( val.dtype, - { - f"f{i}": (np.uint8, i) - for i in range(val.dtype.itemsize) - }, + {f"f{i}": (np.uint8, i) for i in range(val.dtype.itemsize)}, ) ) val = val.view(decomposed_np_dtype) @@ -293,4 +290,4 @@ def make_anywidth_numpy_array(val, bitwidth, signed): val = val.view(np.dtype(new_dtype)) # -> reshape: 6*6*i24 val = val.reshape(shape) - return val \ No newline at end of file + return val From e10a28f301f34c0126dcc9d0c620771bfb945e9b Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 15:23:38 -0400 Subject: [PATCH 12/15] [Lint] Fix lint errors --- heterocl/ast/ir_builder.py | 6 +---- heterocl/module.py | 2 +- heterocl/runtime.py | 47 ++------------------------------------ heterocl/tensor.py | 26 ++++++++++----------- heterocl/utils.py | 5 ++-- 5 files changed, 19 insertions(+), 67 deletions(-) diff --git a/heterocl/ast/ir_builder.py b/heterocl/ast/ir_builder.py index ffa820e7..fae0d970 100644 --- a/heterocl/ast/ir_builder.py +++ b/heterocl/ast/ir_builder.py @@ -7,8 +7,6 @@ # Import MLIR dialects # Naming rule: import dialect as dialect_d -import numpy as np - from hcl_mlir.dialects import ( func as func_d, hcl as hcl_d, @@ -1408,10 +1406,8 @@ def build_bit_reverse_op(self, op: ast.BitReverseOp, ip): def build_constant_tensor_op(self, op: ast.ConstantTensorOp, ip): loc = Location.file(op.loc.filename, op.loc.lineno, 0) dtype = hcl_dtype_to_mlir(op.dtype, signless=True) - shape = op.values.shape if isinstance(op.dtype, (htypes.Int, htypes.UInt)): - signed = isinstance(op.dtype, htypes.Int) - val = make_anywidth_numpy_array(op.values, op.dtype.bits, signed) + val = make_anywidth_numpy_array(op.values, op.dtype.bits) value_attr = DenseElementsAttr.get(val, shape=op.values.shape, type=dtype) else: val = op.values diff --git a/heterocl/module.py b/heterocl/module.py index 0dee554a..17bb2093 100644 --- a/heterocl/module.py +++ b/heterocl/module.py @@ -110,7 +110,7 @@ def __call__(self, *argv): argv[len(op.arguments) + i].np_array = np.pad( argv[len(op.arguments) + i].np_array, pad_shape ) - execute_llvm_backend(self.src, self.name, self.return_num, *argv) + execute_llvm_backend(self.src, self.name, *argv) for res, shape in original_results: slicing = [] for s in shape: diff --git a/heterocl/runtime.py b/heterocl/runtime.py index 341fb658..230f4afe 100644 --- a/heterocl/runtime.py +++ b/heterocl/runtime.py @@ -7,7 +7,7 @@ import subprocess import ctypes import time -import numpy as np +import warnings from hcl_mlir import runtime as rt from .report import parse_xml @@ -15,8 +15,6 @@ # Filter out the warning from numpy when using ctypes array as numpy array. # This is a Python bug, see: # https://stackoverflow.com/questions/4964101/pep-3118-warning-when-using-ctypes-array-as-numpy-array -import warnings - warnings.filterwarnings( "ignore", category=RuntimeWarning, @@ -122,39 +120,7 @@ def execute_fpga_backend(target, shell=True): raise RuntimeError("Not implemented") -def execute_llvm_backend_obsolete(execution_engine, name, return_num, *argv): - """ - - execution_engine: mlir.ExecutionEngine object, created in hcl.build - - name: str, device top-level function name - - return_num: int, the number of return values - - argv: list-like object, a list of input and output variables - """ - if not isinstance(argv, list): - argv = list(argv) - # Unwrap hcl Array to get numpy arrays - argv_np = [arg.unwrap() for arg in argv] - # Extract output arrays - return_args = argv_np[-return_num:] - # Convert output variables from numpy arrays to memref pointers - return_pointers = [] - for arg in return_args: - memref = rt.get_ranked_memref_descriptor(arg) - return_pointers.append(ctypes.pointer(ctypes.pointer(memref))) - # Convert input variables from numpy arrays to memref pointers - arg_pointers = [] - for arg in argv_np[0:-return_num]: - memref = rt.get_ranked_memref_descriptor(arg) - arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) - # Invoke device top-level function - execution_engine.invoke(name, *return_pointers, *arg_pointers) - # Copy output arrays back - # might be unnecessary - # for i, return_p in enumerate(return_pointers): - # out_array = rt.ranked_memref_to_numpy(return_p[0]) - # np.copyto(argv[-(len(return_args) - i)].np_array, out_array) # problem here - - -def execute_llvm_backend(execution_engine, name, return_num, *argv): +def execute_llvm_backend(execution_engine, name, *argv): """ Execute LLVM backend. Assume all return args have been moved to input args. @@ -166,7 +132,6 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): argv: list-like object a list of input and output variables """ - # TODO: remove return_num if not isinstance(argv, list): argv = list(argv) @@ -178,11 +143,3 @@ def execute_llvm_backend(execution_engine, name, return_num, *argv): arg_pointers.append(ctypes.pointer(ctypes.pointer(memref))) # Invoke device top-level function execution_engine.invoke(name, *arg_pointers) - # for i, arg_p in enumerate(arg_pointers): - # out_array = rt.ranked_memref_to_numpy(arg_p[0]) - # if out_array element type has one byte, - # ranked_memref_to_numpy will automatically unpack it - # if argv[i].np_array.dtype.itemsize == 1: - # np.copyto(argv[i].np_array['f0'], out_array) - # else: - # np.copyto(argv[i].np_array, out_array) # target, source diff --git a/heterocl/tensor.py b/heterocl/tensor.py index 00cbdfbd..cb48b0ea 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -1,8 +1,8 @@ # Copyright HeteroCL authors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -import numpy as np import math +import numpy as np from hcl_mlir.exceptions import DTypeError, APIError, DTypeWarning from .types import dtype_to_str, Int, UInt, Float, Fixed, UFixed @@ -36,19 +36,17 @@ def __init__(self, array, dtype): # with each field being a byte. self.np_array = self._handle_overflow(array, dtype) if isinstance(dtype, (Int, UInt)): - signed = isinstance(dtype, Int) # closest power of 2 bitwidth = 1 << (self.dtype.bits - 1).bit_length() - if bitwidth < 8: - bitwidth = 8 - # this is to be compliant with MLIR's anywidth type representation + bitwidth = max(bitwidth, 8) + # this is to be compliant with MLIR's anywidth int type alignment # e.g. i1-i8 -> int8 # i9-i16 -> int16 # i17-i32 -> int32 # i33-i64 -> int64 # i65-i128 -> int128 # i129-i256 -> int256 - self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth, signed) + self.np_array = make_anywidth_numpy_array(self.np_array, bitwidth) def asnumpy(self): """ @@ -56,6 +54,7 @@ def asnumpy(self): If the bitwidth is wider than 64, the result will be a python list. Otherwise, return a numpy array. """ + # pylint: disable=no-else-return if isinstance(self.dtype, Float): hcl_dtype_str = dtype_to_str(self.dtype) np_dtype = np.dtype(hcl_dtype_str) @@ -65,13 +64,13 @@ def asnumpy(self): if self.dtype.bits > 64: DTypeWarning( f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" - ) + ).warn() return self._struct_np_array_to_int() elif isinstance(self.dtype, UInt): if self.dtype.bits > 64: DTypeWarning( f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" - ) + ).warn() return self._struct_np_array_to_int() # TODO(Niansong): fixed/ufixed does not go through struct_np_array_to_int for now # because a change in IR is needed to support this, leaving it to another PR @@ -79,7 +78,7 @@ def asnumpy(self): if self.dtype.bits > 64: DTypeWarning( f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" - ) + ).warn() # base_array = self._struct_np_array_to_int() # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) @@ -87,7 +86,7 @@ def asnumpy(self): if self.dtype.bits > 64: DTypeWarning( f"The bitwidth of target type is wider than 64 ({self.dtype}), .asnumpy() returns a python list" - ) + ).warn() # base_array = self._struct_np_array_to_int() # return base_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) return self.np_array.astype(np.float64) / float(2 ** (self.dtype.fracs)) @@ -196,16 +195,15 @@ def to_int(x): # concatenate the tuple # each element is a byte byte_str = b"" - for i in range(len(x)): - byte_str += x[i].to_bytes(1, byteorder="little", signed=False) + for byte in x: + byte_str += byte.to_bytes(1, byteorder="little", signed=False) value = int.from_bytes(byte_str, byteorder="little", signed=signed) return value pylist = to_int(pylist) if self.dtype.bits <= 64: return np.array(pylist, dtype=np.int64) - else: - return pylist + return pylist def __repr__(self) -> str: return self.asnumpy().__repr__() diff --git a/heterocl/utils.py b/heterocl/utils.py index db6387ed..b5806ff3 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -217,7 +217,7 @@ def get_max_value(dtype): raise DTypeError(f"Unrecognized data type: {dtype}") -def make_anywidth_numpy_array(val, bitwidth, signed): +def make_anywidth_numpy_array(val, bitwidth): """ Converts a numpy array to any target bitwidth. ---------------- @@ -247,6 +247,8 @@ def make_anywidth_numpy_array(val, bitwidth, signed): # numpy input: 6*6*i64 # 1. Decompose the original i32 or i64 array into a structured array of uint8 # -> decompose: 6*6*8*i8 + # pylint: disable=no-else-return + # I think this if-else makes the code more readable if bitwidth == 1: return np.packbits(val, axis=None, bitorder="little") else: @@ -270,7 +272,6 @@ def make_anywidth_numpy_array(val, bitwidth, signed): new_dtype = np.dtype( { "names": [f"f{i}" for i in range(n_bytes)], - # "formats": ["u1"] * (n_bytes - 1) + (["i1"] if signed else ["u1"]), "formats": ["u1"] * n_bytes, "offsets": list(range(n_bytes)), "itemsize": n_bytes, From a873b892f25ef5bd5bb5fb0541dd6fa4e87855eb Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 15:48:01 -0400 Subject: [PATCH 13/15] [Lint] Upgrade local pylint, fix errors --- heterocl/tensor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/heterocl/tensor.py b/heterocl/tensor.py index cb48b0ea..3d8e6226 100644 --- a/heterocl/tensor.py +++ b/heterocl/tensor.py @@ -126,7 +126,7 @@ def cast_func(x): cast_func(x) for x in array ] # TODO: this should be tested independently else: - array = np.vectorize(cast_func)(array) + array = np.vectorize(cast_func)(array).astype(np.int64) elif isinstance(dtype, UInt): # Handle overflow sb = 1 << self.dtype.bits @@ -149,8 +149,7 @@ def cast_func(x): if isinstance(array, list): array = [cast_func(x) for x in array] else: - array = np.vectorize(cast_func)(array) - array = array.astype(np.int64) + array = np.vectorize(cast_func)(array).astype(np.int64) elif isinstance(dtype, UFixed): # Handle overflow sb = 1 << self.dtype.bits @@ -167,8 +166,7 @@ def cast_func(x): if isinstance(array, list): array = [cast_func(x) for x in array] else: - array = np.vectorize(cast_func)(array) - array = array.astype(np.int64) + array = np.vectorize(cast_func)(array).astype(np.int64) else: raise DTypeError("Type error: unrecognized type: " + str(self.dtype)) return array From 506349aca8e69c1dfb3c85c0b2ed82552d42667d Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 16:25:27 -0400 Subject: [PATCH 14/15] [Test] Add test_irregular_bitwidth_input --- heterocl/utils.py | 2 +- tests/test_dtype.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/heterocl/utils.py b/heterocl/utils.py index b5806ff3..91418928 100644 --- a/heterocl/utils.py +++ b/heterocl/utils.py @@ -234,7 +234,7 @@ def make_anywidth_numpy_array(val, bitwidth): numpy array with the target bitwidth """ shape = val.shape - sign_array = val > 0 + sign_array = val >= 0 avail_bytes = val.itemsize # number of bytes of each element # The following code has several steps to convert the numpy array to have # the correct data type in order to create an MLIR constant tensor. diff --git a/tests/test_dtype.py b/tests/test_dtype.py index 7ff3a3f5..47439bc5 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -671,5 +671,35 @@ def cast(A): assert False, "test failed, see failed test case above" +def test_irregular_bitwidth_input(): + def test_int(dtype): + hcl.init(dtype) + A = hcl.placeholder((10,), "A", dtype=dtype) + B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") + s = hcl.create_schedule([A, B]) + f = hcl.build(s) + # A_np = np.random.randint(-10, 10, A.shape) + A_np = np.zeros(A.shape) + A_hcl = hcl.asarray(A_np, dtype=dtype) + B_hcl = hcl.asarray(np.zeros(A.shape), dtype=dtype) + f(A_hcl, B_hcl) + B_np = B_hcl.asnumpy() + if dtype.bits <= 64: + golden = hcl.asarray(A_np + 1, dtype=dtype).asnumpy() + assert np.allclose(golden, B_np) + else: + # B_np is a list + golden = [x + 1 for x in A_np.tolist()] + for res, g in zip(B_np, golden): + if res != g: + print(f"res: {res}, hex: {hex(res)}\n") + print(f"g: {g}, hex: {hex(g)}\n") + assert res == g + + test_dtypes = [hcl.Int(2), hcl.Int(20), hcl.Int(63), hcl.Int(255), hcl.Int(512)] + for dtype in test_dtypes: + test_int(dtype) + + if __name__ == "__main__": pytest.main([__file__]) From acb4e2f58080fe9444987cdb25908f3446bb9506 Mon Sep 17 00:00:00 2001 From: Niansong Zhang Date: Wed, 15 Mar 2023 16:41:02 -0400 Subject: [PATCH 15/15] [Test] Use random input --- tests/test_dtype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dtype.py b/tests/test_dtype.py index 47439bc5..8100ac82 100644 --- a/tests/test_dtype.py +++ b/tests/test_dtype.py @@ -678,8 +678,8 @@ def test_int(dtype): B = hcl.compute(A.shape, lambda *args: A[args] + 1, "B") s = hcl.create_schedule([A, B]) f = hcl.build(s) - # A_np = np.random.randint(-10, 10, A.shape) - A_np = np.zeros(A.shape) + A_np = np.random.randint(-10, 10, A.shape) + # A_np = np.zeros(A.shape) A_hcl = hcl.asarray(A_np, dtype=dtype) B_hcl = hcl.asarray(np.zeros(A.shape), dtype=dtype) f(A_hcl, B_hcl)