diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 524e5752b7..540a7d3c73 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -8,9 +8,19 @@ on:
 jobs:
   pre-commit:
     runs-on: ubuntu-20.04
+    defaults:
+      run:
+        shell: bash -el {0}
     steps:
     - uses: actions/checkout@v3
-    - uses: actions/setup-python@v3
+    - uses: conda-incubator/setup-miniconda@v2
       with:
         python-version: '3.11'
-    - uses: pre-commit/action@v3.0.0
+        activate-environment: "coverage"
+        channel-priority: "disabled"
+        environment-file: environment/pre-commit.yml
+    - uses: actions/cache@v3
+      with:
+        path: ~/.cache/pre-commit
+        key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
+    - run: pre-commit run --show-diff-on-failure --color=always --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 19615e0f10..1753b7a3fe 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -47,3 +47,17 @@ repos:
         args: ["-i"]
         exclude: "numba_dpex/dpnp_iface"
         types_or: [c++, c]
+-   repo: local
+    hooks:
+    - id: pylint
+      name: pylint
+      entry: pylint
+      files: ^numba_dpex/experimental
+      language: system
+      types: [python]
+      require_serial: true
+      args:
+        [
+          "-rn", # Only display messages
+          "-sn", # Don't display the score
+        ]
diff --git a/environment/pre-commit.yml b/environment/pre-commit.yml
new file mode 100644
index 0000000000..3cfb9024b6
--- /dev/null
+++ b/environment/pre-commit.yml
@@ -0,0 +1,25 @@
+name: dev
+channels:
+  - dppy/label/dev
+  - numba
+  - intel
+  - conda-forge
+  - nodefaults
+dependencies:
+  - libffi
+  - gxx_linux-64
+  - dpcpp_linux-64
+  - numba==0.58*
+  - dpctl
+  - dpnp
+  - dpcpp-llvm-spirv
+  - opencl_rt
+  - coverage
+  - pytest
+  - pytest-cov
+  - pytest-xdist
+  - pexpect
+  - scikit-build>=0.15*
+  - cmake>=3.26*
+  - pre-commit
+  - pylint
diff --git a/numba_dpex/config.py b/numba_dpex/config.py
index fef20002a5..ece1ba8ecd 100644
--- a/numba_dpex/config.py
+++ b/numba_dpex/config.py
@@ -59,10 +59,15 @@ def __getattr__(name):
     "NUMBA_DPEX_DEBUGINFO", int, config.DEBUGINFO_DEFAULT
 )
 
-# Emit LLVM assembly language format(.ll)
-DUMP_KERNEL_LLVM = _readenv(
-    "NUMBA_DPEX_DUMP_KERNEL_LLVM", int, config.DUMP_OPTIMIZED
-)
+# Emit LLVM IR generated for kernel decorated function
+DUMP_KERNEL_LLVM = _readenv("NUMBA_DPEX_DUMP_KERNEL_LLVM", int, 0)
+
+# Emit LLVM module generated to launch a kernel decorated function
+DUMP_KERNEL_LAUNCHER = _readenv("NUMBA_DPEX_DUMP_KERNEL_LAUNCHER", int, 0)
+
+# Enables debug printf messages inside the kernel launcher module generated for
+# a kernel decorated function
+DEBUG_KERNEL_LAUNCHER = _readenv("NUMBA_DPEX_DEBUG_KERNEL_LAUNCHER", int, 0)
 
 # configs for caching
 # To see the debug messages for the caching.
diff --git a/numba_dpex/core/descriptor.py b/numba_dpex/core/descriptor.py
index 6d40686289..406f7115f8 100644
--- a/numba_dpex/core/descriptor.py
+++ b/numba_dpex/core/descriptor.py
@@ -38,11 +38,13 @@ def _inherit_if_not_set(flags, options, name, default=targetconfig._NotSet):
 class DpexTargetOptions(CPUTargetOptions):
     experimental = _option_mapping("experimental")
     release_gil = _option_mapping("release_gil")
+    no_compile = _option_mapping("no_compile")
 
     def finalize(self, flags, options):
         super().finalize(flags, options)
         _inherit_if_not_set(flags, options, "experimental", False)
         _inherit_if_not_set(flags, options, "release_gil", False)
+        _inherit_if_not_set(flags, options, "no_compile", True)
 
 
 class DpexKernelTarget(TargetDescriptor):
diff --git a/numba_dpex/core/exceptions.py b/numba_dpex/core/exceptions.py
index 8d5c68366f..aa755c0f77 100644
--- a/numba_dpex/core/exceptions.py
+++ b/numba_dpex/core/exceptions.py
@@ -215,6 +215,12 @@ def __init__(self, kernel_name, *, usmarray_argnum_list) -> None:
                 f"usm_ndarray arguments {usmarray_args} were not allocated "
                 "on the same queue."
             )
+        else:
+            self.message = (
+                f'Execution queue for kernel "{kernel_name}" could '
+                "be deduced using compute follows data programming model. The "
+                "kernel has no USMNdArray argument."
+            )
         super().__init__(self.message)
 
 
diff --git a/numba_dpex/core/parfors/parfor_lowerer.py b/numba_dpex/core/parfors/parfor_lowerer.py
index eb8e9e1d87..bcc3ae2082 100644
--- a/numba_dpex/core/parfors/parfor_lowerer.py
+++ b/numba_dpex/core/parfors/parfor_lowerer.py
@@ -3,9 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import copy
+from collections import namedtuple
 
 from llvmlite import ir as llvmir
-from numba.core import ir, types
+from numba.core import cgutils, ir, types
 from numba.parfors.parfor import (
     find_potential_aliases_parfor,
     get_parfor_outputs,
@@ -27,6 +28,12 @@
     create_reduction_remainder_kernel_for_parfor,
 )
 
+_KernelArgs = namedtuple(
+    "_KernelArgs",
+    ["num_flattened_args", "arg_vals", "arg_types"],
+)
+
+
 # A global list of kernels to keep the objects alive indefinitely.
 keep_alive_kernels = []
 
@@ -84,21 +91,7 @@ class ParforLowerImpl:
     for a parfor and submits it to a queue.
     """
 
-    def _get_exec_queue(self, kernel_fn, lowerer):
-        """Creates a stack variable storing the sycl queue pointer used to
-        launch the kernel function.
-        """
-        self.kernel_builder = KernelLaunchIRBuilder(
-            lowerer.context, lowerer.builder, kernel_fn.kernel.addressof_ref()
-        )
-
-        # Create a local variable storing a pointer to a DPCTLSyclQueueRef
-        # pointer.
-        self.curr_queue = self.kernel_builder.get_queue(
-            exec_queue=kernel_fn.queue
-        )
-
-    def _build_kernel_arglist(self, kernel_fn, lowerer):
+    def _build_kernel_arglist(self, kernel_fn, lowerer, kernel_builder):
         """Creates local variables for all the arguments and the argument types
         that are passes to the kernel function.
 
@@ -110,7 +103,7 @@ def _build_kernel_arglist(self, kernel_fn, lowerer):
             AssertionError: If the LLVM IR Value for an argument defined in
             Numba IR is not found.
         """
-        self.num_flattened_args = 0
+        num_flattened_args = 0
 
         # Compute number of args to be passed to the kernel. Note that the
         # actual number of kernel arguments is greater than the count of
@@ -118,31 +111,35 @@ def _build_kernel_arglist(self, kernel_fn, lowerer):
         for arg_type in kernel_fn.kernel_arg_types:
             if isinstance(arg_type, DpnpNdArray):
                 datamodel = dpex_dmm.lookup(arg_type)
-                self.num_flattened_args += datamodel.flattened_field_count
+                num_flattened_args += datamodel.flattened_field_count
             elif arg_type == types.complex64 or arg_type == types.complex128:
-                self.num_flattened_args += 2
+                num_flattened_args += 2
             else:
-                self.num_flattened_args += 1
+                num_flattened_args += 1
 
         # Create LLVM values for the kernel args list and kernel arg types list
-        self.args_list = self.kernel_builder.allocate_kernel_arg_array(
-            self.num_flattened_args
-        )
-        self.args_ty_list = self.kernel_builder.allocate_kernel_arg_ty_array(
-            self.num_flattened_args
+        args_list = kernel_builder.allocate_kernel_arg_array(num_flattened_args)
+        args_ty_list = kernel_builder.allocate_kernel_arg_ty_array(
+            num_flattened_args
         )
         callargs_ptrs = []
         for arg in kernel_fn.kernel_args:
             callargs_ptrs.append(_getvar(lowerer, arg))
 
-        self.kernel_builder.populate_kernel_args_and_args_ty_arrays(
+        kernel_builder.populate_kernel_args_and_args_ty_arrays(
             kernel_argtys=kernel_fn.kernel_arg_types,
             callargs_ptrs=callargs_ptrs,
-            args_list=self.args_list,
-            args_ty_list=self.args_ty_list,
+            args_list=args_list,
+            args_ty_list=args_ty_list,
             datamodel_mgr=dpex_dmm,
         )
 
+        return _KernelArgs(
+            num_flattened_args=num_flattened_args,
+            arg_vals=args_list,
+            arg_types=args_ty_list,
+        )
+
     def _submit_parfor_kernel(
         self,
         lowerer,
@@ -156,9 +153,11 @@ def _submit_parfor_kernel(
         # Ensure that the Python arguments are kept alive for the duration of
         # the kernel execution
         keep_alive_kernels.append(kernel_fn.kernel)
+        kernel_builder = KernelLaunchIRBuilder(lowerer.context, lowerer.builder)
+
+        ptr_to_queue_ref = kernel_builder.get_queue(exec_queue=kernel_fn.queue)
+        args = self._build_kernel_arglist(kernel_fn, lowerer, kernel_builder)
 
-        self._get_exec_queue(kernel_fn, lowerer)
-        self._build_kernel_arglist(kernel_fn, lowerer)
         # Create a global range over which to submit the kernel based on the
         # loop_ranges of the parfor
         global_range = []
@@ -178,18 +177,26 @@ def _submit_parfor_kernel(
 
         local_range = []
 
+        kernel_ref_addr = kernel_fn.kernel.addressof_ref()
+        kernel_ref = lowerer.builder.inttoptr(
+            lowerer.context.get_constant(types.uintp, kernel_ref_addr),
+            cgutils.voidptr_t,
+        )
+        curr_queue_ref = lowerer.builder.load(ptr_to_queue_ref)
+
         # Submit a synchronous kernel
-        self.kernel_builder.submit_sync_kernel(
-            self.curr_queue,
-            self.num_flattened_args,
-            self.args_list,
-            self.args_ty_list,
-            global_range,
-            local_range,
+        kernel_builder.submit_sycl_kernel(
+            sycl_kernel_ref=kernel_ref,
+            sycl_queue_ref=curr_queue_ref,
+            total_kernel_args=args.num_flattened_args,
+            arg_list=args.arg_vals,
+            arg_ty_list=args.arg_types,
+            global_range=global_range,
+            local_range=local_range,
         )
 
         # At this point we can free the DPCTLSyclQueueRef (curr_queue)
-        self.kernel_builder.free_queue(sycl_queue_val=self.curr_queue)
+        kernel_builder.free_queue(ptr_to_sycl_queue_ref=ptr_to_queue_ref)
 
     def _submit_reduction_main_parfor_kernel(
         self,
@@ -204,9 +211,11 @@ def _submit_reduction_main_parfor_kernel(
         # Ensure that the Python arguments are kept alive for the duration of
         # the kernel execution
         keep_alive_kernels.append(kernel_fn.kernel)
+        kernel_builder = KernelLaunchIRBuilder(lowerer.context, lowerer.builder)
+
+        ptr_to_queue_ref = kernel_builder.get_queue(exec_queue=kernel_fn.queue)
 
-        self._get_exec_queue(kernel_fn, lowerer)
-        self._build_kernel_arglist(kernel_fn, lowerer)
+        args = self._build_kernel_arglist(kernel_fn, lowerer, kernel_builder)
         # Create a global range over which to submit the kernel based on the
         # loop_ranges of the parfor
         global_range = []
@@ -220,16 +229,27 @@ def _submit_reduction_main_parfor_kernel(
             _load_range(lowerer, reductionHelper.work_group_size)
         )
 
+        kernel_ref_addr = kernel_fn.kernel.addressof_ref()
+        kernel_ref = lowerer.builder.inttoptr(
+            lowerer.context.get_constant(types.uintp, kernel_ref_addr),
+            cgutils.voidptr_t,
+        )
+        curr_queue_ref = lowerer.builder.load(ptr_to_queue_ref)
+
         # Submit a synchronous kernel
-        self.kernel_builder.submit_sync_kernel(
-            self.curr_queue,
-            self.num_flattened_args,
-            self.args_list,
-            self.args_ty_list,
-            global_range,
-            local_range,
+        kernel_builder.submit_sycl_kernel(
+            sycl_kernel_ref=kernel_ref,
+            sycl_queue_ref=curr_queue_ref,
+            total_kernel_args=args.num_flattened_args,
+            arg_list=args.arg_vals,
+            arg_ty_list=args.arg_types,
+            global_range=global_range,
+            local_range=local_range,
         )
 
+        # At this point we can free the DPCTLSyclQueueRef (curr_queue)
+        kernel_builder.free_queue(ptr_to_sycl_queue_ref=ptr_to_queue_ref)
+
     def _submit_reduction_remainder_parfor_kernel(
         self,
         lowerer,
@@ -243,8 +263,11 @@ def _submit_reduction_remainder_parfor_kernel(
         # the kernel execution
         keep_alive_kernels.append(kernel_fn.kernel)
 
-        self._get_exec_queue(kernel_fn, lowerer)
-        self._build_kernel_arglist(kernel_fn, lowerer)
+        kernel_builder = KernelLaunchIRBuilder(lowerer.context, lowerer.builder)
+
+        ptr_to_queue_ref = kernel_builder.get_queue(exec_queue=kernel_fn.queue)
+
+        args = self._build_kernel_arglist(kernel_fn, lowerer, kernel_builder)
         # Create a global range over which to submit the kernel based on the
         # loop_ranges of the parfor
         global_range = []
@@ -255,16 +278,27 @@ def _submit_reduction_remainder_parfor_kernel(
 
         local_range = []
 
+        kernel_ref_addr = kernel_fn.kernel.addressof_ref()
+        kernel_ref = lowerer.builder.inttoptr(
+            lowerer.context.get_constant(types.uintp, kernel_ref_addr),
+            cgutils.voidptr_t,
+        )
+        curr_queue_ref = lowerer.builder.load(ptr_to_queue_ref)
+
         # Submit a synchronous kernel
-        self.kernel_builder.submit_sync_kernel(
-            self.curr_queue,
-            self.num_flattened_args,
-            self.args_list,
-            self.args_ty_list,
-            global_range,
-            local_range,
+        kernel_builder.submit_sycl_kernel(
+            sycl_kernel_ref=kernel_ref,
+            sycl_queue_ref=curr_queue_ref,
+            total_kernel_args=args.num_flattened_args,
+            arg_list=args.arg_vals,
+            arg_ty_list=args.arg_types,
+            global_range=global_range,
+            local_range=local_range,
         )
 
+        # At this point we can free the DPCTLSyclQueueRef (curr_queue)
+        kernel_builder.free_queue(ptr_to_sycl_queue_ref=ptr_to_queue_ref)
+
     def _reduction_codegen(
         self,
         parfor,
diff --git a/numba_dpex/core/parfors/reduction_helper.py b/numba_dpex/core/parfors/reduction_helper.py
index 31cd8ff618..93d9a147c0 100644
--- a/numba_dpex/core/parfors/reduction_helper.py
+++ b/numba_dpex/core/parfors/reduction_helper.py
@@ -395,11 +395,7 @@ def work_group_size(self):
 
     def copy_final_sum_to_host(self, parfor_kernel):
         lowerer = self.lowerer
-        ir_builder = KernelLaunchIRBuilder(
-            lowerer.context,
-            lowerer.builder,
-            parfor_kernel.kernel.addressof_ref(),
-        )
+        ir_builder = KernelLaunchIRBuilder(lowerer.context, lowerer.builder)
 
         # Create a local variable storing a pointer to a DPCTLSyclQueueRef
         # pointer.
@@ -447,4 +443,4 @@ def copy_final_sum_to_host(self, parfor_kernel):
             sycl.dpctl_event_wait(builder, event_ref)
             sycl.dpctl_event_delete(builder, event_ref)
 
-        ir_builder.free_queue(sycl_queue_val=curr_queue)
+        ir_builder.free_queue(ptr_to_sycl_queue_ref=curr_queue)
diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c
index d6e4798013..91f695ee4e 100644
--- a/numba_dpex/core/runtime/_dpexrt_python.c
+++ b/numba_dpex/core/runtime/_dpexrt_python.c
@@ -467,9 +467,9 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
     mi->size = nitems * itemsize;
     mi->external_allocator = ext_alloca;
 
-    DPEXRT_DEBUG(drt_debug_print(
-        "DPEXRT-DEBUG: NRT_MemInfo_init mi=%p external_allocator=%p\n", mi,
-        ext_alloca));
+    DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: NRT_MemInfo_init mi=%p "
+                                 "external_allocator=%p at %s, line %d\n",
+                                 mi, ext_alloca, __FILE__, __LINE__));
 
     return mi;
 
@@ -797,14 +797,15 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj,
     Py_IncRef(obj);
 
     DPEXRT_DEBUG(drt_debug_print(
-        "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_from_python.\n"));
+        "DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_from_python at %s, line %d\n",
+        __FILE__, __LINE__));
 
     // Check if the PyObject obj has an _array_obj attribute that is of
     // dpctl.tensor.usm_ndarray type.
     if (!(arrayobj = PyUSMNdArray_ARRAYOBJ(obj))) {
         DPEXRT_DEBUG(drt_debug_print(
-            "DPEXRT-ERROR: PyUSMNdArray_ARRAYOBJ check failed %d\n", __FILE__,
-            __LINE__));
+            "DPEXRT-ERROR: PyUSMNdArray_ARRAYOBJ check failed at %s, line %d\n",
+            __FILE__, __LINE__));
         goto error;
     }
 
@@ -874,6 +875,11 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj,
         }
     }
 
+    DPEXRT_DEBUG(
+        drt_debug_print("DPEXRT-DEBUG: Done with unboxing call to "
+                        "DPEXRT_sycl_usm_ndarray_from_python at %s, line %d\n",
+                        __FILE__, __LINE__));
+
     return 0;
 
 error:
diff --git a/numba_dpex/core/utils/kernel_launcher.py b/numba_dpex/core/utils/kernel_launcher.py
index 79c13254c8..143ba1b3d5 100644
--- a/numba_dpex/core/utils/kernel_launcher.py
+++ b/numba_dpex/core/utils/kernel_launcher.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from llvmlite import ir as llvmir
 from numba.core import cgutils, types
 
 from numba_dpex import utils
@@ -20,17 +21,16 @@ class KernelLaunchIRBuilder:
     for submitting kernels. The LLVM Values that
     """
 
-    def __init__(self, context, builder, kernel_addr):
+    def __init__(self, context, builder):
         """Create a KernelLauncher for the specified kernel.
 
         Args:
-            context: A Numba target context that will be used to generate the code.
+            context: A Numba target context that will be used to generate the
+                     code.
             builder: An llvmlite IRBuilder instance used to generate LLVM IR.
-            kernel_addr: The address of a SYCL kernel.
         """
         self.context = context
         self.builder = builder
-        self.kernel_addr = kernel_addr
         self.rtctx = DpexRTContext(self.context)
 
     def _build_nullptr(self):
@@ -275,7 +275,7 @@ def get_queue(self, exec_queue):
         )
         return sycl_queue_val
 
-    def free_queue(self, sycl_queue_val):
+    def free_queue(self, ptr_to_sycl_queue_ref):
         """
         Frees the ``DPCTLSyclQueueRef`` pointer that was used to launch the
         kernels.
@@ -283,7 +283,7 @@ def free_queue(self, sycl_queue_val):
         Args:
             sycl_queue_val: The SYCL queue pointer to be freed.
         """
-        qref = self.builder.load(sycl_queue_val)
+        qref = self.builder.load(ptr_to_sycl_queue_ref)
         sycl.dpctl_queue_delete(self.builder, qref)
 
     def allocate_kernel_arg_array(self, num_kernel_args):
@@ -324,20 +324,24 @@ def allocate_kernel_arg_ty_array(self, num_kernel_args):
         return args_ty_list
 
     def _create_sycl_range(self, idx_range):
-        """_summary_
+        """Allocate a size_t[3] array to store the extents of a sycl::range.
+
+        Sycl supports upto 3-dimensional ranges and a such the array is
+        statically sized to length three. Only the elements that store an actual
+        range value are populated based on the size of the idx_range argument.
 
-        Args:
-            idx_range (_type_): _description_
         """
         intp_t = utils.get_llvm_type(context=self.context, type=types.intp)
         intp_ptr_t = utils.get_llvm_ptr_type(intp_t)
         num_dim = len(idx_range)
 
+        MAX_SIZE_OF_SYCL_RANGE = 3
+
         # form the global range
-        global_range = cgutils.alloca_once(
+        range_list = cgutils.alloca_once(
             self.builder,
             utils.get_llvm_type(context=self.context, type=types.uintp),
-            size=self.context.get_constant(types.uintp, num_dim),
+            size=self.context.get_constant(types.uintp, MAX_SIZE_OF_SYCL_RANGE),
         )
 
         for i in range(num_dim):
@@ -350,32 +354,32 @@ def _create_sycl_range(self, idx_range):
             self.builder.store(
                 rext,
                 self.builder.gep(
-                    global_range,
+                    range_list,
                     [self.context.get_constant(types.uintp, (num_dim - 1) - i)],
                 ),
             )
 
-        return self.builder.bitcast(global_range, intp_ptr_t)
+        return self.builder.bitcast(range_list, intp_ptr_t)
 
-    def submit_sync_kernel(
+    def submit_sycl_kernel(
         self,
-        sycl_queue_val,
+        sycl_kernel_ref,
+        sycl_queue_ref,
         total_kernel_args,
         arg_list,
         arg_ty_list,
         global_range,
-        local_range=None,
-    ):
+        local_range=[],
+        wait_before_return=True,
+    ) -> llvmir.PointerType(llvmir.IntType(8)):
         """
         Submits the kernel to the specified queue, waits.
         """
+        eref = None
         gr = self._create_sycl_range(global_range)
         args1 = [
-            self.builder.inttoptr(
-                self.context.get_constant(types.uintp, self.kernel_addr),
-                utils.get_llvm_type(context=self.context, type=types.voidptr),
-            ),
-            self.builder.load(sycl_queue_val),
+            sycl_kernel_ref,
+            sycl_queue_ref,
             arg_list,
             arg_ty_list,
             self.context.get_constant(types.uintp, total_kernel_args),
@@ -394,11 +398,18 @@ def submit_sync_kernel(
         args = []
         if len(local_range) == 0:
             args = args1 + args2
-            self.rtctx.submit_range(self.builder, *args)
+            eref = sycl.dpctl_queue_submit_range(self.builder, *args)
         else:
             lr = self._create_sycl_range(local_range)
             args = args1 + [lr] + args2
-            self.rtctx.submit_ndrange(self.builder, *args)
+            eref = sycl.dpctl_queue_submit_ndrange(self.builder, *args)
+
+        if wait_before_return:
+            sycl.dpctl_event_wait(self.builder, eref)
+            sycl.dpctl_event_delete(self.builder, eref)
+            return None
+        else:
+            return eref
 
     def populate_kernel_args_and_args_ty_arrays(
         self,
diff --git a/numba_dpex/dpctl_iface/libsyclinterface_bindings.py b/numba_dpex/dpctl_iface/libsyclinterface_bindings.py
index c515d4a96f..9975e96d0f 100644
--- a/numba_dpex/dpctl_iface/libsyclinterface_bindings.py
+++ b/numba_dpex/dpctl_iface/libsyclinterface_bindings.py
@@ -30,6 +30,9 @@ def _build_dpctl_function(llvm_module, return_ty, arg_list, func_name):
     return fn
 
 
+# --------------------- DpctlSyclQueue functions ------------------------------#
+
+
 def dpctl_queue_copy(builder: llvmir.IRBuilder, *args):
     """Inserts LLVM IR to call DPCTLQueue_Copy to create a copy of a
     DpctlSyclQueueRef pointer passed in to the function.
@@ -66,42 +69,255 @@ def dpctl_queue_memcpy(builder: llvmir.IRBuilder, *args):
     return ret
 
 
-def dpctl_event_wait(builder: llvmir.IRBuilder, *args):
-    """Inserts LLVM IR to call DPCTLEvent_Wait."""
+def dpctl_queue_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLQueue_Delete."""
     mod = builder.module
     fn = _build_dpctl_function(
         llvm_module=mod,
-        return_ty=llvmir.VoidType(),
+        return_ty=llvmir.types.VoidType(),
         arg_list=[cgutils.voidptr_t],
-        func_name="DPCTLEvent_Wait",
+        func_name="DPCTLQueue_Delete",
     )
     ret = builder.call(fn, args)
 
     return ret
 
 
-def dpctl_event_delete(builder: llvmir.IRBuilder, *args):
-    """Inserts LLVM IR to call DPCTLEvent_Delete."""
+def dpctl_queue_get_context(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLQueue_GetContext."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLQueue_GetContext",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+def dpctl_queue_get_device(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLQueue_GetDevice."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLQueue_GetDevice",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+def dpctl_queue_submit_range(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLQueue_SubmitRange.
+
+    DPCTLSyclEventRef
+    DPCTLQueue_SubmitRange(
+        const DPCTLSyclKernelRef KRef,
+        const DPCTLSyclQueueRef QRef,
+        void** Args,
+        const DPCTLKernelArgType* ArgTypes,
+        size_t NArgs,
+        const size_t Range[3],
+        size_t NRange,
+        const DPCTLSyclEventRef* DepEvents,
+        size_t NDepEvents
+    );
+
+    """
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[
+            cgutils.voidptr_t,
+            cgutils.voidptr_t,
+            cgutils.voidptr_t.as_pointer(),
+            cgutils.int32_t.as_pointer(),
+            llvmir.IntType(64),
+            llvmir.IntType(64).as_pointer(),
+            llvmir.IntType(64),
+            cgutils.voidptr_t,
+            llvmir.IntType(64),
+        ],
+        func_name="DPCTLQueue_SubmitRange",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+def dpctl_queue_submit_ndrange(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLQueue_SubmitNDRange.
+
+    DPCTLSyclEventRef
+    DPCTLQueue_SubmitNDRange(
+        const DPCTLSyclKernelRef KRef,
+        const DPCTLSyclQueueRef QRef,
+        void** Args,
+        const DPCTLKernelArgType* ArgTypes,
+        size_t NArgs,
+        const size_t gRange[3],
+        const size_t lRange[3],
+        size_t NDims,
+        const DPCTLSyclEventRef* DepEvents,
+        size_t NDepEvents
+    );
+
+    """
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[
+            cgutils.voidptr_t,
+            cgutils.voidptr_t,
+            cgutils.voidptr_t.as_pointer(),
+            cgutils.int32_t.as_pointer(),
+            llvmir.IntType(64),
+            llvmir.IntType(64).as_pointer(),
+            llvmir.IntType(64).as_pointer(),
+            llvmir.IntType(64),
+            cgutils.voidptr_t,
+            llvmir.IntType(64),
+        ],
+        func_name="DPCTLQueue_SubmitNDRange",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+# --------------------- DpctlSyclDevice functions -----------------------------#
+
+
+def dpctl_device_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLDevice_Delete."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=llvmir.types.VoidType(),
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLDevice_Delete",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+# --------------------- DpctlSyclKernelBundle functions -----------------------#
+
+
+def dpctl_kernel_bundle_create_from_spirv(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLKernelBundle_CreateFromSpirv."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[
+            cgutils.voidptr_t,
+            cgutils.voidptr_t,
+            cgutils.voidptr_t,
+            llvmir.IntType(64),
+            cgutils.voidptr_t,
+        ],
+        func_name="DPCTLKernelBundle_CreateFromSpirv",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+def dpctl_kernel_bundle_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLKernelBundle_Delete."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=llvmir.types.VoidType(),
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLKernelBundle_Delete",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+def dpctl_kernel_bundle_get_kernel(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLKernelBundle_GetKernel."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=cgutils.voidptr_t,
+        arg_list=[cgutils.voidptr_t, cgutils.voidptr_t],
+        func_name="DPCTLKernelBundle_GetKernel",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+# --------------------- DpctlSyclKernel functions -----------------------------#
+
+
+def dpctl_kernel_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLKernel_Delete."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=llvmir.types.VoidType(),
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLKernel_Delete",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+# --------------------- DpctlSyclContext functions ----------------------------#
+
+
+def dpctl_context_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLContext_Delete."""
+    mod = builder.module
+    fn = _build_dpctl_function(
+        llvm_module=mod,
+        return_ty=llvmir.types.VoidType(),
+        arg_list=[cgutils.voidptr_t],
+        func_name="DPCTLContext_Delete",
+    )
+    ret = builder.call(fn, args)
+
+    return ret
+
+
+# --------------------- DpctlSyclEvent functions ------------------------------#
+
+
+def dpctl_event_wait(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLEvent_Wait."""
     mod = builder.module
     fn = _build_dpctl_function(
         llvm_module=mod,
         return_ty=llvmir.VoidType(),
         arg_list=[cgutils.voidptr_t],
-        func_name="DPCTLEvent_Delete",
+        func_name="DPCTLEvent_Wait",
     )
     ret = builder.call(fn, args)
 
     return ret
 
 
-def dpctl_queue_delete(builder: llvmir.IRBuilder, *args):
-    """Inserts LLVM IR to call DPCTLQueue_Delete."""
+def dpctl_event_delete(builder: llvmir.IRBuilder, *args):
+    """Inserts LLVM IR to call DPCTLEvent_Delete."""
     mod = builder.module
     fn = _build_dpctl_function(
         llvm_module=mod,
         return_ty=llvmir.VoidType(),
         arg_list=[cgutils.voidptr_t],
-        func_name="DPCTLQueue_Delete",
+        func_name="DPCTLEvent_Delete",
     )
     ret = builder.call(fn, args)
 
diff --git a/numba_dpex/experimental/__init__.py b/numba_dpex/experimental/__init__.py
new file mode 100644
index 0000000000..589d10df5d
--- /dev/null
+++ b/numba_dpex/experimental/__init__.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Contains experimental features that are meant as engineering preview and not
+yet production ready.
+"""
+
+from numba.core.imputils import Registry
+
+from .decorators import kernel
+from .kernel_dispatcher import KernelDispatcher
+from .launcher import call_kernel
+from .models import *
+from .types import KernelDispatcherType
+
+registry = Registry()
+lower_constant = registry.lower_constant
+
+
+@lower_constant(KernelDispatcherType)
+def dpex_dispatcher_const(context):
+    """Dummy lowerer for a KernelDispatcherType object. It is added so that a
+    KernelDispatcher can be passed as an argument to dpjit.
+    """
+    return context.get_dummy_value()
+
+
+__all__ = ["kernel", "KernelDispatcher", "call_kernel"]
diff --git a/numba_dpex/experimental/decorators.py b/numba_dpex/experimental/decorators.py
new file mode 100644
index 0000000000..22565eb1cb
--- /dev/null
+++ b/numba_dpex/experimental/decorators.py
@@ -0,0 +1,80 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+""" The set of experimental decorators provided by numba_dpex that are not yet
+ready to move to numba_dpex.core.
+"""
+import inspect
+
+from numba.core import sigutils
+
+from .kernel_dispatcher import KernelDispatcher
+
+
+def kernel(func_or_sig=None, debug=False, cache=False, **options):
+    """A decorator to define a kernel function.
+
+    A kernel function is conceptually equivalent to a SYCL kernel function, and
+    gets compiled into either an OpenCL or a LevelZero SPIR-V binary kernel.
+    A kernel decorated Python function has the following restrictions:
+
+        * The function can not return any value.
+        * All array arguments passed to a kernel should adhere to compute
+          follows data programming model.
+    """
+    # FIXME: The options need to be evaluated and checked here like it is
+    # done in numba.core.decorators.jit
+
+    def _kernel_dispatcher(pyfunc, sigs=None):
+        return KernelDispatcher(
+            pyfunc=pyfunc,
+            debug_flags=debug,
+            enable_cache=cache,
+            specialization_sigs=sigs,
+            targetoptions=options,
+        )
+
+    if func_or_sig is None:
+        return _kernel_dispatcher
+
+    if isinstance(func_or_sig, str):
+        raise NotImplementedError(
+            "Specifying signatures as string is not yet supported by numba-dpex"
+        )
+
+    if isinstance(func_or_sig, list) or sigutils.is_signature(func_or_sig):
+        # String signatures are not supported as passing usm_ndarray type as
+        # a string is not possible. Numba's sigutils relies on the type being
+        # available in Numba's `types.__dict__` and dpex types are not
+        # registered there yet.
+        if isinstance(func_or_sig, list):
+            for sig in func_or_sig:
+                if isinstance(sig, str):
+                    raise NotImplementedError(
+                        "Specifying signatures as string is not yet supported "
+                        "by numba-dpex"
+                    )
+        # Specialized signatures can either be a single signature or a list.
+        # In case only one signature is provided convert it to a list
+        if not isinstance(func_or_sig, list):
+            func_or_sig = [func_or_sig]
+
+        def _specialized_kernel_dispatcher(pyfunc):
+            return KernelDispatcher(
+                pyfunc=pyfunc,
+                debug_flags=debug,
+                enable_cache=cache,
+                specialization_sigs=func_or_sig,
+            )
+
+        return _specialized_kernel_dispatcher
+    func = func_or_sig
+    if not inspect.isfunction(func):
+        raise ValueError(
+            "Argument passed to the kernel decorator is neither a "
+            "function object, nor a signature. If you are trying to "
+            "specialize the kernel that takes a single argument, specify "
+            "the return type as void explicitly."
+        )
+    return _kernel_dispatcher(func)
diff --git a/numba_dpex/experimental/kernel_dispatcher.py b/numba_dpex/experimental/kernel_dispatcher.py
new file mode 100644
index 0000000000..9f23274848
--- /dev/null
+++ b/numba_dpex/experimental/kernel_dispatcher.py
@@ -0,0 +1,346 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Implements a new numba dispatcher class and a compiler class to compile and
+call numba_dpex.kernel decorated function.
+"""
+import functools
+from collections import Counter, OrderedDict, namedtuple
+from contextlib import ExitStack
+
+import numba.core.event as ev
+from numba.core import errors, sigutils, types, utils
+from numba.core.caching import NullCache
+from numba.core.compiler import CompileResult
+from numba.core.compiler_lock import global_compiler_lock
+from numba.core.dispatcher import Dispatcher, _DispatcherBase, _FunctionCompiler
+from numba.core.typing.typeof import Purpose, typeof
+
+from numba_dpex import config, spirv_generator
+from numba_dpex.core.descriptor import dpex_kernel_target
+from numba_dpex.core.exceptions import (
+    ExecutionQueueInferenceError,
+    UnsupportedKernelArgumentError,
+)
+from numba_dpex.core.pipelines import kernel_compiler
+from numba_dpex.core.types import DpnpNdArray
+
+_KernelModule = namedtuple("_KernelModule", ["kernel_name", "kernel_bitcode"])
+
+_KernelCompileResult = namedtuple(
+    "_KernelCompileResult",
+    ["status", "cres_or_error", "entry_point"],
+)
+
+
+class _KernelCompiler(_FunctionCompiler):
+    """A special compiler class used to compile numba_dpex.kernel decorated
+    functions.
+    """
+
+    def check_queue_equivalence_of_args(
+        self, py_func_name: str, args: [types.Type, ...]
+    ):
+        """Evaluates if all DpnpNdArray arguments passed to a kernel function
+        has the same DpctlSyclQueue type.
+
+        Args:
+            py_func_name (str): Name of the kernel that is being evaluated
+            args (types.Type, ...]): List of numba inferred types for each
+            argument passed to the kernel
+
+        Raises:
+            ExecutionQueueInferenceError: If all DpnpNdArray were not allocated
+            on the same dpctl.SyclQueue
+            ExecutionQueueInferenceError: If there were not DpnpNdArray
+            arguments passed to the kernel.
+        """
+        common_queue = None
+
+        for arg in args:
+            if isinstance(arg, DpnpNdArray):
+                if common_queue is None:
+                    common_queue = arg.queue
+                elif common_queue != arg.queue:
+                    raise ExecutionQueueInferenceError(
+                        kernel_name=py_func_name, usmarray_argnum_list=[]
+                    )
+
+        if common_queue is None:
+            raise ExecutionQueueInferenceError(
+                kernel_name=py_func_name, usmarray_argnum_list=None
+            )
+
+    def _compile_to_spirv(
+        self, kernel_library, kernel_fndesc, kernel_targetctx
+    ):
+        kernel_func = kernel_library.get_function(kernel_fndesc.llvm_func_name)
+
+        # Create a spir_kernel wrapper function
+        kernel_fn = kernel_targetctx.prepare_spir_kernel(
+            kernel_func, kernel_fndesc.argtypes
+        )
+
+        # makes sure that the spir_func is completely inlined into the
+        # spir_kernel wrapper
+        kernel_library._optimize_final_module()
+        # Compiled the LLVM IR to SPIR-V
+        kernel_spirv_module = spirv_generator.llvm_to_spirv(
+            kernel_targetctx,
+            kernel_library._final_module,
+            kernel_library._final_module.as_bitcode(),
+        )
+        return _KernelModule(
+            kernel_name=kernel_fn.name, kernel_bitcode=kernel_spirv_module
+        )
+
+    def compile(self, args, return_type):
+        kcres = self._compile_cached(args, return_type)
+        if kcres.status:
+            return kcres
+
+        raise kcres.cres_or_error
+
+    def _compile_cached(
+        self, args, return_type: types.Type
+    ) -> _KernelCompileResult:
+        """Compiles the kernel function to bitcode and generates a host-callable
+        wrapper to submit the kernel to a SYCL queue.
+
+        The LLVM IR generated for the kernel function is available in the
+        CompileResult objected returned by
+        numba_dpex.core.pipeline.kernel_compiler.KernelCompiler.
+
+        Once the kernel decorated function is compiled down to LLVM IR, the
+        following steps are performed:
+
+            a) compile the IR into SPIR-V kernel
+            b) generate a host callable wrapper function that will create a
+               sycl::kernel_bundle from the SPIR-V and then submits the
+               kernel_bundle to a sycl::queue
+            c) create a cpython_wrapper_function for the host callable wrapper
+               function.
+            d) create a cfunc_wrapper_function to make the host callable wrapper
+               function callable inside another JIT-compiled function.
+
+        Args:
+            args (tuple(types.Type)): A tuple of numba.core.Type instances each
+            representing the numba-inferred type of a kernel argument.
+
+            return_type (types.Type): The numba-inferred type of the returned
+            value from the kernel. Should always be types.NoneType.
+
+        Returns:
+            CompileResult: A CompileResult object storing the LLVM library for
+            the host-callable wrapper function.
+        """
+        key = tuple(args), return_type
+        try:
+            return _KernelCompileResult(False, self._failed_cache[key], None)
+        except KeyError:
+            pass
+
+        try:
+            kernel_cres: CompileResult = self._compile_core(args, return_type)
+
+            kernel_library = kernel_cres.library
+            kernel_fndesc = kernel_cres.fndesc
+            kernel_targetctx = kernel_cres.target_context
+
+            kernel_module = self._compile_to_spirv(
+                kernel_library, kernel_fndesc, kernel_targetctx
+            )
+
+            if config.DUMP_KERNEL_LLVM:
+                with open(
+                    kernel_cres.fndesc.llvm_func_name + ".ll",
+                    "w",
+                    encoding="UTF-8",
+                ) as f:
+                    f.write(kernel_cres.library._final_module)
+
+        except errors.TypingError as e:
+            self._failed_cache[key] = e
+            return _KernelCompileResult(False, e, None)
+
+        return _KernelCompileResult(True, kernel_cres, kernel_module)
+
+
+class KernelDispatcher(Dispatcher):
+    """Dispatcher class designed to compile kernel decorated functions. The
+    dispatcher inherits the Numba Dispatcher class, but has a different
+    compilation strategy. Instead of compiling a kernel decorated function to
+    an executable binary, the dispatcher compiles it to SPIR-V and then caches
+    that SPIR-V bitcode.
+
+    FIXME: Fix issues identified by pylint with this class.
+    https://github.com/IntelPython/numba-dpex/issues/1196
+
+    """
+
+    targetdescr = dpex_kernel_target
+    _fold_args = False
+
+    Dispatcher._impl_kinds["kernel"] = _KernelCompiler
+
+    def __init__(
+        self,
+        pyfunc,
+        debug_flags=None,
+        compile_flags=None,
+        specialization_sigs=None,
+        enable_cache=True,
+        locals={},
+        targetoptions={},
+        impl_kind="kernel",
+        pipeline_class=kernel_compiler.KernelCompiler,
+    ):
+        targetoptions["nopython"] = True
+        targetoptions["experimental"] = True
+
+        self._kernel_name = pyfunc.__name__
+        self.typingctx = self.targetdescr.typing_context
+        self.targetctx = self.targetdescr.target_context
+
+        pysig = utils.pysignature(pyfunc)
+        arg_count = len(pysig.parameters)
+
+        self.overloads = OrderedDict()
+
+        can_fallback = not targetoptions.get("nopython", False)
+
+        _DispatcherBase.__init__(
+            self,
+            arg_count,
+            pyfunc,
+            pysig,
+            can_fallback,
+            exact_match_required=False,
+        )
+
+        functools.update_wrapper(self, pyfunc)
+
+        self.targetoptions = targetoptions
+        self.locals = locals
+        self._cache = NullCache()
+        compiler_class = self._impl_kinds[impl_kind]
+        self._impl_kind = impl_kind
+        self._compiler: _KernelCompiler = compiler_class(
+            pyfunc, self.targetdescr, targetoptions, locals, pipeline_class
+        )
+        self._cache_hits = Counter()
+        self._cache_misses = Counter()
+
+        self._type = types.Dispatcher(self)
+        self.typingctx.insert_global(self, self._type)
+
+        # Remember target restriction
+        self._required_target_backend = targetoptions.get("target_backend")
+
+    def typeof_pyval(self, val):
+        """
+        Resolve the Numba type of Python value *val*.
+        This is called from numba._dispatcher as a fallback if the native code
+        cannot decide the type.
+        """
+        # Not going through the resolve_argument_type() indirection
+        # can save a couple µs.
+        try:
+            tp = typeof(val, Purpose.argument)
+            if isinstance(tp, types.Array) and not isinstance(tp, DpnpNdArray):
+                raise UnsupportedKernelArgumentError(
+                    type=str(type(val)), value=val
+                )
+        except ValueError:
+            tp = types.pyobject
+        else:
+            if tp is None:
+                tp = types.pyobject
+        self._types_active_call.append(tp)
+        return tp
+
+    def add_overload(self, cres):
+        args = tuple(cres.signature.args)
+        self.overloads[args] = cres.entry_point
+
+    def compile(self, sig) -> _KernelCompileResult:
+        disp = self._get_dispatcher_for_current_target()
+        if disp is not self:
+            return disp.compile(sig)
+
+        with ExitStack() as scope:
+            cres = None
+
+            def cb_compiler(dur):
+                if cres is not None:
+                    self._callback_add_compiler_timer(dur, cres)
+
+            def cb_llvm(dur):
+                if cres is not None:
+                    self._callback_add_llvm_timer(dur, cres)
+
+            scope.enter_context(
+                ev.install_timer("numba:compiler_lock", cb_compiler)
+            )
+            scope.enter_context(ev.install_timer("numba:llvm_lock", cb_llvm))
+            scope.enter_context(global_compiler_lock)
+
+            if not self._can_compile:
+                raise RuntimeError("compilation disabled")
+            # Use counter to track recursion compilation depth
+            with self._compiling_counter:
+                args, return_type = sigutils.normalize_signature(sig)
+
+                try:
+                    self._compiler.check_queue_equivalence_of_args(
+                        self._kernel_name, args
+                    )
+                except ExecutionQueueInferenceError as eqie:
+                    raise eqie
+
+                # Don't recompile if signature already exists
+                existing = self.overloads.get(tuple(args))
+                if existing is not None:
+                    return existing
+
+                # TODO: Enable caching
+                # Add code to enable on disk caching of a binary spirv kernel.
+                # Refer: https://github.com/IntelPython/numba-dpex/issues/1197
+                self._cache_misses[sig] += 1
+                ev_details = {
+                    "dispatcher": self,
+                    "args": args,
+                    "return_type": return_type,
+                }
+                with ev.trigger_event("numba_dpex:compile", data=ev_details):
+                    try:
+                        kcres: _KernelCompileResult = self._compiler.compile(
+                            args, return_type
+                        )
+                    except errors.ForceLiteralArg as e:
+
+                        def folded(args, kws):
+                            return self._compiler.fold_argument_types(
+                                args, kws
+                            )[1]
+
+                        raise e.bind_fold_arguments(folded)
+                    self.add_overload(kcres.cres_or_error)
+
+                # TODO: enable caching of kernel_module
+                # https://github.com/IntelPython/numba-dpex/issues/1197
+
+                return kcres.entry_point
+
+    def __getitem__(self, args):
+        """Square-bracket notation for configuring launch arguments is not
+        supported.
+        """
+
+        raise NotImplementedError
+
+    def __call__(self, *args, **kw_args):
+        """Functor to launch a kernel."""
+
+        raise NotImplementedError
diff --git a/numba_dpex/experimental/launcher.py b/numba_dpex/experimental/launcher.py
new file mode 100644
index 0000000000..ea242e1846
--- /dev/null
+++ b/numba_dpex/experimental/launcher.py
@@ -0,0 +1,398 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Provides a helper function to call a numba_dpex.kernel decorated function
+from either CPython or a numba_dpex.dpjit decorated function.
+"""
+
+from collections import namedtuple
+from typing import Union
+
+from llvmlite import ir as llvmir
+from numba.core import cgutils, cpu, types
+from numba.extending import intrinsic, overload
+
+from numba_dpex import config, dpjit
+from numba_dpex.core.exceptions import UnreachableError
+from numba_dpex.core.targets.kernel_target import DpexKernelTargetContext
+from numba_dpex.core.types import DpnpNdArray, NdRangeType, RangeType
+from numba_dpex.core.utils import kernel_launcher as kl
+from numba_dpex.dpctl_iface import libsyclinterface_bindings as sycl
+from numba_dpex.experimental.kernel_dispatcher import _KernelModule
+from numba_dpex.utils import create_null_ptr
+
+_KernelArgs = namedtuple(
+    "_KernelArgs",
+    [
+        "flattened_args_count",
+        "array_of_kernel_args",
+        "array_of_kernel_arg_types",
+    ],
+)
+
+_KernelSubmissionArgs = namedtuple(
+    "_KernelSubmissionArgs",
+    [
+        "kernel_ref",
+        "queue_ref",
+        "kernel_args",
+        "global_range_extents",
+        "local_range_extents",
+    ],
+)
+
+_LLVMIRValuesForIndexSpace = namedtuple(
+    "_LLVMIRValuesForNdRange", ["global_range_extents", "local_range_extents"]
+)
+
+
+class _LaunchTrampolineFunctionBodyGenerator:
+    """
+    Helper class to generate the LLVM IR for the launch_trampoline intrinsic.
+    """
+
+    def _get_num_flattened_kernel_args(
+        self,
+        kernel_targetctx: DpexKernelTargetContext,
+        kernel_argtys: tuple[types.Type, ...],
+    ):
+        num_flattened_kernel_args = 0
+        for arg_type in kernel_argtys:
+            if isinstance(arg_type, DpnpNdArray):
+                datamodel = kernel_targetctx.data_model_manager.lookup(arg_type)
+                num_flattened_kernel_args += datamodel.flattened_field_count
+            elif arg_type in [types.complex64, types.complex128]:
+                num_flattened_kernel_args += 2
+            else:
+                num_flattened_kernel_args += 1
+
+        return num_flattened_kernel_args
+
+    def __init__(
+        self,
+        codegen_targetctx: cpu.CPUContext,
+        kernel_targetctx: DpexKernelTargetContext,
+        builder: llvmir.IRBuilder,
+    ):
+        self._cpu_codegen_targetctx = codegen_targetctx
+        self._kernel_targetctx = kernel_targetctx
+        self._builder = builder
+        self._klbuilder = kl.KernelLaunchIRBuilder(kernel_targetctx, builder)
+
+        if config.DEBUG_KERNEL_LAUNCHER:
+            cgutils.printf(
+                self._builder,
+                "DPEX-DEBUG: Inside the kernel launcher function\n",
+            )
+
+    def insert_kernel_bitcode_as_byte_str(
+        self, kernel_module: _KernelModule
+    ) -> None:
+        """Inserts a global constant byte string in the current LLVM module to
+        store the passed in SPIR-V binary blob.
+        """
+        return self._cpu_codegen_targetctx.insert_const_bytes(
+            self._builder.module,
+            bytes=kernel_module.kernel_bitcode,
+        )
+
+    def populate_kernel_args_and_argsty_arrays(
+        self,
+        kernel_argtys: tuple[types.Type, ...],
+        kernel_args: [llvmir.Instruction, ...],
+    ) -> _KernelArgs:
+        """Allocates an LLVM array value to store each flattened kernel arg and
+        another LLVM array to store the typeid for each flattened kernel arg.
+        The arrays are the populated with the LLVM value for each arg.
+        """
+        num_flattened_kernel_args = self._get_num_flattened_kernel_args(
+            kernel_targetctx=self._kernel_targetctx, kernel_argtys=kernel_argtys
+        )
+
+        # Create LLVM values for the kernel args list and kernel arg types list
+        args_list = self._klbuilder.allocate_kernel_arg_array(
+            num_flattened_kernel_args
+        )
+        args_ty_list = self._klbuilder.allocate_kernel_arg_ty_array(
+            num_flattened_kernel_args
+        )
+        kernel_args_ptrs = []
+        for arg in kernel_args:
+            ptr = self._builder.alloca(arg.type)
+            self._builder.store(arg, ptr)
+            kernel_args_ptrs.append(ptr)
+
+        # Populate the args_list and the args_ty_list LLVM arrays
+        self._klbuilder.populate_kernel_args_and_args_ty_arrays(
+            callargs_ptrs=kernel_args_ptrs,
+            kernel_argtys=kernel_argtys,
+            args_list=args_list,
+            args_ty_list=args_ty_list,
+            datamodel_mgr=self._kernel_targetctx.data_model_manager,
+        )
+
+        if config.DEBUG_KERNEL_LAUNCHER:
+            cgutils.printf(
+                self._builder,
+                "DPEX-DEBUG: Populated kernel args and arg type arrays.\n",
+            )
+
+        return _KernelArgs(
+            flattened_args_count=num_flattened_kernel_args,
+            array_of_kernel_args=args_list,
+            array_of_kernel_arg_types=args_ty_list,
+        )
+
+    def get_queue_ref_val(
+        self,
+        kernel_argtys: [types.Type, ...],
+        kernel_args: [llvmir.Instruction, ...],
+    ):
+        """
+        Get the sycl queue from the first DpnpNdArray argument. Prior passes
+        before lowering make sure that compute-follows-data is enforceable
+        for a specific call to a kernel. As such, at the stage of lowering
+        the queue from the first DpnpNdArray argument can be extracted.
+        """
+
+        for arg_num, argty in enumerate(kernel_argtys):
+            if isinstance(argty, DpnpNdArray):
+                llvm_val = kernel_args[arg_num]
+                datamodel = self._kernel_targetctx.data_model_manager.lookup(
+                    argty
+                )
+                sycl_queue_attr_pos = datamodel.get_field_position("sycl_queue")
+                ptr_to_queue_ref = self._builder.extract_value(
+                    llvm_val, sycl_queue_attr_pos
+                )
+                break
+
+        return ptr_to_queue_ref
+
+    def get_kernel(self, kernel_module, kbref):
+        """Returns the pointer to the sycl::kernel object in a passed in
+        sycl::kernel_bundle wrapper object.
+        """
+        kernel_name = self._cpu_codegen_targetctx.insert_const_string(
+            self._builder.module, kernel_module.kernel_name
+        )
+        return sycl.dpctl_kernel_bundle_get_kernel(
+            self._builder, kbref, kernel_name
+        )
+
+    def create_llvm_values_for_index_space(
+        self,
+        indexer_argty: Union[RangeType, NdRangeType],
+        index_space_arg: llvmir.BaseStructType,
+    ) -> _LLVMIRValuesForIndexSpace:
+        """Returns a list of LLVM IR Values that hold the unboxed extents of a
+        Python Range or NdRange object.
+        """
+        ndim = indexer_argty.ndim
+        grange_extents = []
+        lrange_extents = []
+        datamodel = self._kernel_targetctx.data_model_manager.lookup(
+            indexer_argty
+        )
+
+        if isinstance(indexer_argty, RangeType):
+            for dim_num in range(ndim):
+                dim_pos = datamodel.get_field_position("dim" + str(dim_num))
+                grange_extents.append(
+                    self._builder.extract_value(index_space_arg, dim_pos)
+                )
+        elif isinstance(indexer_argty, NdRangeType):
+            for dim_num in range(ndim):
+                gdim_pos = datamodel.get_field_position("gdim" + str(dim_num))
+                grange_extents.append(
+                    self._builder.extract_value(index_space_arg, gdim_pos)
+                )
+                ldim_pos = datamodel.get_field_position("ldim" + str(dim_num))
+                lrange_extents.append(
+                    self._builder.extract_value(index_space_arg, ldim_pos)
+                )
+        else:
+            raise UnreachableError
+
+        return _LLVMIRValuesForIndexSpace(
+            global_range_extents=grange_extents,
+            local_range_extents=lrange_extents,
+        )
+
+    def create_kernel_bundle_from_spirv(
+        self,
+        queue_ref: llvmir.PointerType,
+        kernel_bc: llvmir.Constant,
+        kernel_bc_size_in_bytes: int,
+    ) -> llvmir.CallInstr:
+        """Calls DPCTLKernelBundle_CreateFromSpirv to create an opaque pointer
+        to a sycl::kernel_bundle from the SPIR-V generated for a kernel.
+        """
+        dref = sycl.dpctl_queue_get_device(self._builder, queue_ref)
+        cref = sycl.dpctl_queue_get_context(self._builder, queue_ref)
+        args = [
+            cref,
+            dref,
+            kernel_bc,
+            llvmir.Constant(llvmir.IntType(64), kernel_bc_size_in_bytes),
+            self._builder.load(
+                create_null_ptr(self._builder, self._cpu_codegen_targetctx)
+            ),
+        ]
+        kbref = sycl.dpctl_kernel_bundle_create_from_spirv(self._builder, *args)
+        sycl.dpctl_context_delete(self._builder, cref)
+        sycl.dpctl_device_delete(self._builder, dref)
+
+        if config.DEBUG_KERNEL_LAUNCHER:
+            cgutils.printf(
+                self._builder,
+                "DPEX-DEBUG: Generated kernel_bundle from SPIR-V.\n",
+            )
+
+        return kbref
+
+    def submit_and_wait(self, submit_call_args: _KernelSubmissionArgs) -> None:
+        """Generates LLVM IR CallInst to submit a kernel to specified SYCL queue
+        and then call DPCTLEvent_Wait on the returned event.
+        """
+        if config.DEBUG_KERNEL_LAUNCHER:
+            cgutils.printf(
+                self._builder, "DPEX-DEBUG: Submit sync range kernel.\n"
+            )
+
+        eref = self._klbuilder.submit_sycl_kernel(
+            sycl_kernel_ref=submit_call_args.kernel_ref,
+            sycl_queue_ref=submit_call_args.queue_ref,
+            total_kernel_args=submit_call_args.kernel_args.flattened_args_count,
+            arg_list=submit_call_args.kernel_args.array_of_kernel_args,
+            arg_ty_list=submit_call_args.kernel_args.array_of_kernel_arg_types,
+            global_range=submit_call_args.global_range_extents,
+            local_range=submit_call_args.local_range_extents,
+            wait_before_return=False,
+        )
+        if config.DEBUG_KERNEL_LAUNCHER:
+            cgutils.printf(self._builder, "DPEX-DEBUG: Wait on event.\n")
+
+        sycl.dpctl_event_wait(self._builder, eref)
+        sycl.dpctl_event_delete(self._builder, eref)
+
+    def cleanup(
+        self,
+        kernel_ref: llvmir.Instruction,
+        kernel_bundle_ref: llvmir.Instruction,
+    ) -> None:
+        """Generates calls to free up temporary resources that were allocated in
+        the launch_trampoline body.
+        """
+        # Delete the kernel ref
+        sycl.dpctl_kernel_delete(self._builder, kernel_ref)
+        # Delete the kernel bundle pointer
+        sycl.dpctl_kernel_bundle_delete(self._builder, kernel_bundle_ref)
+
+
+@intrinsic(target="cpu")
+def intrin_launch_trampoline(
+    typingctx, kernel_fn, index_space, kernel_args  # pylint: disable=W0613
+):
+    """Generates the body of the launch_trampoline overload.
+
+    The intrinsic first compiles the kernel function to SPIRV, and then to a
+    sycl kernel bundle. The arguments to the kernel are also packed into
+    flattened arrays and the sycl queue to which the kernel will be submitted
+    extracted from the args. Finally, the actual kernel is extracted from the
+    kernel bundle and submitted to the sycl queue.
+    """
+    kernel_args_list = list(kernel_args)
+    # signature of this intrinsic
+    sig = types.void(kernel_fn, index_space, kernel_args)
+    # signature of the kernel_fn
+    kernel_sig = types.void(*kernel_args_list)
+    kmodule: _KernelModule = kernel_fn.dispatcher.compile(kernel_sig)
+    kernel_targetctx = kernel_fn.dispatcher.targetctx
+
+    def codegen(cgctx, builder, sig, llargs):
+        kernel_argtys = kernel_sig.args
+        kernel_args_unpacked = []
+        for pos in range(len(kernel_args)):
+            kernel_args_unpacked.append(builder.extract_value(llargs[2], pos))
+
+        fn_body_gen = _LaunchTrampolineFunctionBodyGenerator(
+            codegen_targetctx=cgctx,
+            kernel_targetctx=kernel_targetctx,
+            builder=builder,
+        )
+
+        kernel_bc_byte_str = fn_body_gen.insert_kernel_bitcode_as_byte_str(
+            kmodule
+        )
+
+        populated_kernel_args = (
+            fn_body_gen.populate_kernel_args_and_argsty_arrays(
+                kernel_argtys, kernel_args_unpacked
+            )
+        )
+
+        qref = fn_body_gen.get_queue_ref_val(
+            kernel_argtys=kernel_argtys,
+            kernel_args=kernel_args_unpacked,
+        )
+
+        kbref = fn_body_gen.create_kernel_bundle_from_spirv(
+            queue_ref=qref,
+            kernel_bc=kernel_bc_byte_str,
+            kernel_bc_size_in_bytes=len(kmodule.kernel_bitcode),
+        )
+
+        kref = fn_body_gen.get_kernel(kmodule, kbref)
+
+        index_space_values = fn_body_gen.create_llvm_values_for_index_space(
+            indexer_argty=sig.args[1],
+            index_space_arg=llargs[1],
+        )
+
+        submit_call_args = _KernelSubmissionArgs(
+            kernel_ref=kref,
+            queue_ref=qref,
+            kernel_args=populated_kernel_args,
+            global_range_extents=index_space_values.global_range_extents,
+            local_range_extents=index_space_values.local_range_extents,
+        )
+
+        fn_body_gen.submit_and_wait(submit_call_args)
+
+        fn_body_gen.cleanup(kernel_bundle_ref=kbref, kernel_ref=kref)
+
+    return sig, codegen
+
+
+# pylint: disable=W0613
+def _launch_trampoline(kernel_fn, index_space, *kernel_args):
+    pass
+
+
+@overload(_launch_trampoline, target="cpu")
+def _ol_launch_trampoline(kernel_fn, index_space, *kernel_args):
+    def impl(kernel_fn, index_space, *kernel_args):
+        intrin_launch_trampoline(  # pylint: disable=E1120
+            kernel_fn, index_space, kernel_args
+        )
+
+    return impl
+
+
+@dpjit
+def call_kernel(kernel_fn, index_space, *kernel_args):
+    """Calls a numba_dpex.kernel decorated function from CPython or from another
+    dpjit function.
+
+    Args:
+        kernel_fn (numba_dpex.experimental.KernelDispatcher): A
+        numba_dpex.kernel decorated function that is compiled to a
+        KernelDispatcher by numba_dpex.
+        index_space (Range | NdRange): A numba_dpex.Range or numba_dpex.NdRange
+        type object that specifies the index space for the kernel.
+        kernel_args : List of objects that are passed to the numba_dpex.kernel
+        decorated function.
+    """
+    _launch_trampoline(kernel_fn, index_space, *kernel_args)
diff --git a/numba_dpex/experimental/models.py b/numba_dpex/experimental/models.py
new file mode 100644
index 0000000000..588d44ed19
--- /dev/null
+++ b/numba_dpex/experimental/models.py
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Provides Numba datamodel for the numba_dpex types introduced in the
+numba_dpex.experimental module.
+"""
+
+from numba.core.datamodel import models
+from numba.core.extending import register_model
+
+from numba_dpex.core.datamodel.models import dpex_data_model_manager as dmm
+
+from .types import KernelDispatcherType
+
+# Register the types and datamodel in the DpexKernelTargetContext
+dmm.register(KernelDispatcherType, models.OpaqueModel)
+
+# Register the types and datamodel in the DpexTargetContext
+register_model(KernelDispatcherType)(models.OpaqueModel)
diff --git a/numba_dpex/experimental/types.py b/numba_dpex/experimental/types.py
new file mode 100644
index 0000000000..73c901e3cb
--- /dev/null
+++ b/numba_dpex/experimental/types.py
@@ -0,0 +1,15 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Experimental types that will eventually move to numba_dpex.core.types
+"""
+
+from numba.core import types
+
+
+class KernelDispatcherType(types.Dispatcher):
+    """The type of KernelDispatcher dispatchers"""
+
+    def cast_python_value(self, args):
+        raise NotImplementedError
diff --git a/numba_dpex/tests/experimental/__init__.py b/numba_dpex/tests/experimental/__init__.py
new file mode 100644
index 0000000000..3a217e6325
--- /dev/null
+++ b/numba_dpex/tests/experimental/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/numba_dpex/tests/experimental/test_exec_queue_inference.py b/numba_dpex/tests/experimental/test_exec_queue_inference.py
new file mode 100644
index 0000000000..35dc300ae0
--- /dev/null
+++ b/numba_dpex/tests/experimental/test_exec_queue_inference.py
@@ -0,0 +1,91 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import dpctl
+import dpnp
+import pytest
+
+import numba_dpex.experimental as exp_dpex
+from numba_dpex import Range
+from numba_dpex.core.exceptions import ExecutionQueueInferenceError
+
+
+@exp_dpex.kernel(
+    release_gil=False,
+    no_compile=True,
+    no_cpython_wrapper=True,
+    no_cfunc_wrapper=True,
+)
+def add(a, b, c):
+    c[0] = b[0] + a[0]
+
+
+def test_successful_execution_queue_inference():
+    """
+    Tests if KernelDispatcher successfully infers the execution queue for the
+    kernel.
+    """
+
+    q = dpctl.SyclQueue()
+    a = dpnp.ones(100, sycl_queue=q)
+    b = dpnp.ones_like(a, sycl_queue=q)
+    c = dpnp.zeros_like(a, sycl_queue=q)
+    r = Range(100)
+
+    # FIXME: This test fails unexpectedly if the NUMBA_CAPTURED_ERRORS is set
+    # to "new_style".
+    # Refer: https://github.com/IntelPython/numba-dpex/issues/1195
+    try:
+        exp_dpex.call_kernel(add, r, a, b, c)
+    except:
+        pytest.fail("Unexpected error when calling kernel")
+
+    assert c[0] == b[0] + a[0]
+
+
+def test_execution_queue_inference_error():
+    """
+    Tests if KernelDispatcher successfully raised ExecutionQueueInferenceError
+    when dpnp.ndarray arguments do not share the same dpctl.SyclQueue
+    instance.
+    """
+
+    q1 = dpctl.SyclQueue()
+    q2 = dpctl.SyclQueue()
+    a = dpnp.ones(100, sycl_queue=q1)
+    b = dpnp.ones_like(a, sycl_queue=q2)
+    c = dpnp.zeros_like(a, sycl_queue=q1)
+    r = Range(100)
+
+    from numba.core import config
+
+    current_captured_error_style = config.CAPTURED_ERRORS
+    config.CAPTURED_ERRORS = "new_style"
+
+    with pytest.raises(ExecutionQueueInferenceError):
+        exp_dpex.call_kernel(add, r, a, b, c)
+
+    config.CAPTURED_ERRORS = current_captured_error_style
+
+
+def test_error_when_no_array_args():
+    """
+    Tests if KernelDispatcher successfully raised ExecutionQueueInferenceError
+    when no dpnp.ndarray arguments were passed to a kernel.
+    """
+    a = 1
+    b = 2
+    c = 3
+    r = Range(100)
+
+    from numba.core import config
+
+    current_captured_error_style = config.CAPTURED_ERRORS
+    config.CAPTURED_ERRORS = "new_style"
+
+    with pytest.raises(ExecutionQueueInferenceError):
+        exp_dpex.call_kernel(add, r, a, b, c)
+
+    config.CAPTURED_ERRORS = current_captured_error_style
diff --git a/numba_dpex/tests/experimental/test_kernel_dispatcher.py b/numba_dpex/tests/experimental/test_kernel_dispatcher.py
new file mode 100644
index 0000000000..abda1e1ed3
--- /dev/null
+++ b/numba_dpex/tests/experimental/test_kernel_dispatcher.py
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import dpctl
+import dpnp
+
+import numba_dpex.experimental as exp_dpex
+from numba_dpex import NdRange, Range, dpjit
+
+
+@exp_dpex.kernel(
+    release_gil=False,
+    no_compile=True,
+    no_cpython_wrapper=True,
+    no_cfunc_wrapper=True,
+)
+def add(a, b, c):
+    c[0] = b[0] + a[0]
+
+
+@exp_dpex.kernel(
+    release_gil=False,
+    no_compile=True,
+    no_cpython_wrapper=True,
+    no_cfunc_wrapper=True,
+)
+def sq(a, b):
+    a[0] = b[0] * b[0]
+
+
+def test_call_kernel_from_cpython():
+    """
+    Tests if we can call a kernel function from CPython using the call_kernel
+    dpjit function.
+    """
+
+    q = dpctl.SyclQueue()
+    a = dpnp.ones(100, sycl_queue=q)
+    b = dpnp.ones_like(a, sycl_queue=q)
+    c = dpnp.zeros_like(a, sycl_queue=q)
+    r = Range(100)
+    ndr = NdRange(global_size=(100,), local_size=(1,))
+
+    exp_dpex.call_kernel(add, r, a, b, c)
+
+    assert c[0] == b[0] + a[0]
+
+    exp_dpex.call_kernel(add, ndr, a, b, c)
+
+    assert c[0] == b[0] + a[0]
+
+
+def test_call_kernel_from_dpjit():
+    """
+    Tests if we can call a kernel function from a dpjit function using the
+    call_kernel dpjit function.
+    """
+
+    @dpjit
+    def range_kernel_caller(q, a, b, c):
+        r = Range(100)
+        exp_dpex.call_kernel(add, r, a, b, c)
+        return c
+
+    @dpjit
+    def ndrange_kernel_caller(q, a, b, c):
+        gr = Range(100)
+        lr = Range(1)
+        ndr = NdRange(gr, lr)
+        exp_dpex.call_kernel(add, ndr, a, b, c)
+        return c
+
+    q = dpctl.SyclQueue()
+    a = dpnp.ones(100, sycl_queue=q)
+    b = dpnp.ones_like(a, sycl_queue=q)
+    c = dpnp.zeros_like(a, sycl_queue=q)
+
+    range_kernel_caller(q, a, b, c)
+
+    assert c[0] == b[0] + a[0]
+
+    ndrange_kernel_caller(q, a, b, c)
+
+    assert c[0] == b[0] + a[0]
+
+
+def test_call_multiple_kernels():
+    """
+    Tests if the call_kernel dpjit function supports calling different types of
+    kernel with different number of arguments.
+    """
+    q = dpctl.SyclQueue()
+    a = dpnp.ones(100, sycl_queue=q)
+    b = dpnp.ones_like(a, sycl_queue=q)
+    c = dpnp.zeros_like(a, sycl_queue=q)
+    r = Range(100)
+
+    exp_dpex.call_kernel(add, r, a, b, c)
+
+    assert c[0] == b[0] + a[0]
+
+    exp_dpex.call_kernel(sq, r, a, c)
+
+    assert a[0] == c[0] * c[0]
diff --git a/pyproject.toml b/pyproject.toml
index fc81e2609f..7cd918c47b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,11 @@ style = "pep440"
 versionfile_source = "numba_dpex/_version.py"
 parentdir_prefix = ""
 
+[tool.pylint]
+disable = [
+    "fixme"
+]
+
 [build-system]
 requires = [
     "setuptools>=42",