Skip to content

Commit

Permalink
v0.6.3: fix macos inline kernel bug
Browse files Browse the repository at this point in the history
  • Loading branch information
FindDefinition committed Aug 18, 2024
1 parent 1f1a18a commit cf3edc9
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 16 deletions.
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# Changelog
## [0.6.2] - 2024-08-27
## [0.6.3] - 2024-08-18
### Fixed
- fix bug in mac in inline kernels

## [0.6.2] - 2024-08-17
### Fixed
- fix mac os bug

## [0.6.1] - 2024-08-27
## [0.6.1] - 2024-08-17
### Changed
- debug macos ci

Expand Down
31 changes: 23 additions & 8 deletions cumm/inliner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,15 @@ def type_conversion_code(self, obj, src_name: str, tgt_name: str, user_arg: Opti
# if we use ten.data_ptr(), the result will be
# MTLBuffer + data_offset, which will cause
# segfault.
tv_dtype_str = _cached_get_torch_dtype_to_tv_str()[obj.dtype]
itemsize = obj.itemsize
# storage offset isn't byte offset, so we need to multiply itemsize
# when use raw device pointer.
res.extend([
f"__{tgt_name}_tmp0 = {src_name}",
f"assert __{tgt_name}_tmp0.dtype == {obj.dtype}",
f"{tgt_name} = (EMPTY_TENSOR, kDevicePointer, ",
f" __{tgt_name}_tmp0.untyped_storage().data_ptr(), __{tgt_name}_tmp0.storage_offset())",
f" __{tgt_name}_tmp0.untyped_storage().data_ptr(), __{tgt_name}_tmp0.storage_offset() * {itemsize})",
])
else:
res.extend([
Expand All @@ -301,6 +305,7 @@ def type_conversion_code(self, obj, src_name: str, tgt_name: str, user_arg: Opti
"import torch",
"from cumm import tensorview as tv",
"kDevicePointer = tv._NVRTCModule.kDevicePointer",
# "kTensor = tv._NVRTCModule.kTensor",
"EMPTY_TENSOR = tv.Tensor()",
]
else:
Expand Down Expand Up @@ -400,15 +405,25 @@ def type_conversion_code(self, obj, src_name: str, tgt_name: str, user_arg: Opti
raise NotImplementedError
assert user_arg is not None and isinstance(user_arg, _NVRTCInlineParams)
if user_arg.unchecked_mode:
res.extend([
f"__{tgt_name}_tmp0 = {src_name}",
# f"assert isinstance(__{tgt_name}_tmp0, {obj_type_str})",
f"{tgt_name} = (tv.full([1], __{tgt_name}_tmp0, {tv_dtype}), kScalar, ",
f" 0, 0)",
])
if isinstance(obj, bool):
# bools are func constants (only used in apple metal)
res.extend([
f"__{tgt_name}_tmp0 = {src_name}",
f"assert isinstance(__{tgt_name}_tmp0, bool)",
f"{tgt_name} = (tv.full([1], __{tgt_name}_tmp0, tv.uint8), kConstant, ",
f" 0, 0)",
])
else:
res.extend([
f"__{tgt_name}_tmp0 = {src_name}",
# f"assert isinstance(__{tgt_name}_tmp0, {obj_type_str})",
f"{tgt_name} = (tv.full([1], __{tgt_name}_tmp0, {tv_dtype}), kScalar, ",
f" 0, 0)",
])
return "\n".join(res), [
"from cumm import tensorview as tv",
"kScalar = tv._NVRTCModule.kScalar"
"kScalar = tv._NVRTCModule.kScalar",
"kConstant = tv._NVRTCModule.kConstant"
]
else:
return f"{tgt_name} = {src_name}", []
Expand Down
5 changes: 0 additions & 5 deletions cumm/nvrtc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,12 +735,7 @@ def __init__(self,
self.name_to_meta = self.params.name_to_meta

def load(self):
import llvmlite.binding as llvm
_lazy_load_llvm()
# use clang++ to get ir
opts = self.params.opts
_lazy_load_lib_for_llvm(self.params.libraries,
self.params.libpaths)
with tempfile.TemporaryDirectory() as fdir:
inc_dir = Path(fdir) / "include"
for k, v in self.params.headers.items():
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.2
0.6.3

0 comments on commit cf3edc9

Please sign in to comment.