diff --git a/compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir b/compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir
index 4a7da4befadd..9a537875c6ab 100644
--- a/compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir
+++ b/compiler/plugins/target/ROCM/test/config_ukernel_argmax_gfx942.mlir
@@ -25,7 +25,7 @@ func.func @argmax_2d_f32i64(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CEHCK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_spec<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
 
 // -----
 
@@ -54,7 +54,7 @@ func.func @argmax_4d_unit_parallel_f32i64(%arg0 : tensor<1x1x1x?xf32>) -> tensor
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CEHCK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_spec<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
 
 // -----
 
@@ -82,7 +82,7 @@ func.func @argmax_none_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1xi64>
 // CHECK-LABEL: func @argmax_none_ukernel_enabled(
 //       CHECK: linalg.generic
 //   CHECK-NOT: hal.executable.objects
-//   CHECK-NOT: iree_gpu.ukernel_spec
+//   CHECK-NOT: iree_gpu.ukernel_config
 
 // -----
 
@@ -111,7 +111,7 @@ func.func @argmax_only_argmax_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_spec<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
 
 // -----
 
@@ -140,7 +140,7 @@ func.func @argmax_only_foo_argmax_bar_ukernel_enabled(%arg0 : tensor<1x?xf32>) -
 //       CHECK: linalg.generic
 //  CHECK-SAME: hal.executable.objects = [
 //  CHECK-SAME:   #hal.executable.object<{path = "iree_uk_amdgpu_argmax_f32i64.gfx942.bc", data = dense_resource<iree_uk_amdgpu_argmax_f32i64.gfx942.bc> : vector<{{[0-9]+}}xi8>}>]
-//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_spec<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME:   #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
 
 // -----
 
@@ -168,7 +168,7 @@ func.func @argmax_only_foo_ukernel_enabled(%arg0 : tensor<1x?xf32>) -> tensor<1x
 // CHECK-LABEL: func @argmax_only_foo_ukernel_enabled(
 //       CHECK: linalg.generic
 //   CHECK-NOT: hal.executable.objects
-//   CHECK-NOT: iree_gpu.ukernel_spec
+//   CHECK-NOT: iree_gpu.ukernel_config
 
 // -----
 
@@ -239,4 +239,4 @@ func.func @argmax_2d_f32i64_custom_bitcode(%arg0 : tensor<1x?xf32>) -> tensor<1x
 //  CHECK-SAME:         data = dense<[66, 67, -64, -34, 1, 35, 69, 103, -119, -85, -51, -17]> : tensor<12xi8>
 //  CHECK-SAME:       }>
 //  CHECK-SAME:     ]
-//  CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_spec<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
+//  CHECK-SAME: #iree_gpu.lowering_config<{{.*}}ukernel = #iree_gpu.ukernel_config<name = "iree_uk_amdgpu_argmax_f32i64", def_attrs = {vm.import.module = "rocm"}>
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp
index 796138d55e3f..fd58c29d2654 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPULowerToUKernels.cpp
@@ -43,7 +43,7 @@ matchArgmaxDAGForUKernel(RewriterBase &rewriter, linalg::GenericOp op) {
   if (!loweringConfig) {
     return rewriter.notifyMatchFailure(op, "no lowering_config on this op");
   }
-  IREE::GPU::UKernelSpecAttr ukernelAttr =
+  IREE::GPU::UKernelConfigAttr ukernelAttr =
       IREE::GPU::getUkernelSpec(loweringConfig);
   if (!ukernelAttr) {
     return rewriter.notifyMatchFailure(op, "no ukernel selected for this op");
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_lower_to_ukernels.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_lower_to_ukernels.mlir
index 6a13468a1d29..7acab19f945a 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_lower_to_ukernels.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_lower_to_ukernels.mlir
@@ -1,6 +1,6 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-lower-to-ukernels,cse,canonicalize))" %s | FileCheck %s
 
-#config = #iree_gpu.lowering_config<{ukernel = #iree_gpu.ukernel_spec<name = "some_ukernel", def_attrs = {vm.import.module = "rocm"}>}>
+#config = #iree_gpu.lowering_config<{ukernel = #iree_gpu.ukernel_config<name = "some_ukernel", def_attrs = {vm.import.module = "rocm"}>}>
 func.func @argmax_f32i64_with_selected_ukernel(%arg0 : tensor<1x?xf32>) -> tensor<1xi64> attributes {
   hal.executable.target = #hal.executable.target<"rocm", "rocm-hsaco-fb", {ukernels = "all"}>
 } {
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.cpp
index 8ebfba912442..df85e48a7379 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.cpp
@@ -145,9 +145,9 @@ std::optional<SmallVector<int64_t>> getPaddingList(LoweringConfigAttr config) {
   return getIntegerVector(array);
 }
 
-IREE::GPU::UKernelSpecAttr
+IREE::GPU::UKernelConfigAttr
 getUkernelSpec(IREE::GPU::LoweringConfigAttr config) {
-  return config.getAttributes().getAs<IREE::GPU::UKernelSpecAttr>("ukernel");
+  return config.getAttributes().getAs<IREE::GPU::UKernelConfigAttr>("ukernel");
 }
 
 } // namespace mlir::iree_compiler::IREE::GPU
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.h b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.h
index 5bebb64a1b05..b6afde5d4dd4 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/GPULoweringConfigUtils.h
@@ -59,7 +59,8 @@ void setPromotedOperandList(MLIRContext *context,
 /// Helper to retrieve  list of operand to pad.
 std::optional<SmallVector<int64_t>> getPaddingList(LoweringConfigAttr config);
 
-IREE::GPU::UKernelSpecAttr getUkernelSpec(IREE::GPU::LoweringConfigAttr config);
+IREE::GPU::UKernelConfigAttr
+getUkernelSpec(IREE::GPU::LoweringConfigAttr config);
 
 } // namespace mlir::iree_compiler::IREE::GPU
 
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
index 0b1e32fdc362..e4b66bffbd89 100644
--- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
+++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td
@@ -521,12 +521,12 @@ def IREEGPU_LaneIdAttr : AttrDef<IREEGPU_Dialect, "LaneId", [
 }
 
 //===---------------------------------------------------------------------===//
-// iree_gpu.ukernel_spec
+// iree_gpu.ukernel_config
 //===---------------------------------------------------------------------===//
 
-def IREEGPU_UKernelSpecAttr  :
-    AttrDef<IREEGPU_Dialect, "UKernelSpec", []> {
-  let mnemonic = "ukernel_spec";
+def IREEGPU_UKernelConfigAttr  :
+    AttrDef<IREEGPU_Dialect, "UKernelConfig", []> {
+  let mnemonic = "ukernel_config";
   let summary = "An attribute specifying a ukernel that an op can lower to.";
   let description = [{
     An attribute that can be applied to any operation to specify that it has
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
index 1f44bf693a55..fbc0f37a129b 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -2103,14 +2103,11 @@ static LogicalResult
 setArgmaxUkernelConfig(IREE::GPU::TargetAttr target,
                        mlir::FunctionOpInterface entryPoint,
                        linalg::GenericOp op) {
-  // Checks if UKernels are enabled.
-  IREE::GPU::UKernelSpecAttr ukernelSpec = selectUKernelForArgmax(op);
-  if (!ukernelSpec) {
+  IREE::GPU::UKernelConfigAttr ukernelConfig = selectUKernel(op);
+  if (!ukernelConfig) {
     return failure();
   }
 
-  if (failed(isArgmaxOp(op)))
-    return failure();
   SmallVector<unsigned> parallelDims;
   SmallVector<unsigned> reductionDims;
   op.getParallelDims(parallelDims);
@@ -2161,7 +2158,7 @@ setArgmaxUkernelConfig(IREE::GPU::TargetAttr target,
                      b.getI64ArrayAttr(workgroupTileSizes));
   attrs.emplace_back(StringAttr::get(context, "reduction"),
                      b.getI64ArrayAttr(reductionTileSizes));
-  attrs.emplace_back(StringAttr::get(context, "ukernel"), ukernelSpec);
+  attrs.emplace_back(StringAttr::get(context, "ukernel"), ukernelConfig);
   IREE::GPU::setPromotedOperandList(context, attrs, {0, 1});
   auto configDict = DictionaryAttr::get(context, attrs);
   auto loweringConfig = IREE::GPU::LoweringConfigAttr::get(context, configDict);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.cpp
index 1940e8f0b102..2f2861f926cc 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.cpp
@@ -18,7 +18,49 @@ namespace mlir::iree_compiler {
 
 namespace {
 
-constexpr StringLiteral executableObjectsAttrName = "hal.executable.objects";
+// Returns ukernel name and suffix for argmax. Empty name = no ukernel.
+static std::tuple<std::string, std::string>
+getUKernelNameAndSuffixForArgmax(linalg::GenericOp op) {
+  Value input = op.getDpsInputOperand(0)->get();
+  auto inputType = cast<ShapedType>(input.getType());
+  Value index = op.getDpsInitOperand(1)->get();
+  auto indexType = cast<ShapedType>(index.getType());
+  return {"argmax", llvm::formatv("{}{}", inputType.getElementType(),
+                                  indexType.getElementType())};
+}
+
+// Returns ukernel name and suffix for any op. Empty name = no ukernel.
+static std::tuple<std::string, std::string>
+getUKernelNameAndSuffix(Operation *op) {
+  if (auto genericOp = dyn_cast<linalg::GenericOp>(op)) {
+    if (succeeded(isArgmaxOp(genericOp))) {
+      return getUKernelNameAndSuffixForArgmax(genericOp);
+    }
+  }
+  return {};
+}
+
+// Returns the UKernelConfigAttr for any op. Returns {} if no ukernel.
+static IREE::GPU::UKernelConfigAttr getUKernelConfig(Operation *op) {
+  MLIRContext *context = op->getContext();
+  auto [name, suffix] = getUKernelNameAndSuffix(op);
+  if (name.empty() || suffix.empty()) {
+    return {};
+  }
+  auto target = IREE::HAL::ExecutableTargetAttr::lookup(op);
+  if (!hasUkernel(target, name)) {
+    return {};
+  }
+  if (isROCMBackend(target)) {
+    auto nameAttr = StringAttr::get(
+        context, llvm::formatv("iree_uk_amdgpu_{}_{}", name, suffix));
+    auto defsAttr = DictionaryAttr::get(
+        context, {{StringAttr::get(context, "vm.import.module"),
+                   StringAttr::get(context, "rocm")}});
+    return IREE::GPU::UKernelConfigAttr::get(context, nameAttr, defsAttr);
+  }
+  return {};
+}
 
 // Returns a ExecutableObjectAttr carrying the bitcode for the given ukernel.
 //
@@ -77,7 +119,8 @@ getUKernelBitcode(MLIRContext *context,
 // array attribute. If the parent hal.executable.variant is reached, its objects
 // attribute is returned.
 // Adapted from ExecutableTargetAttr::lookup.
-static ArrayAttr lookUpExecutableObjects(Operation *op) {
+static ArrayAttr lookUpExecutableObjects(Operation *op,
+                                         StringRef executableObjectsAttrName) {
   MLIRContext *context = op->getContext();
   auto attrId = StringAttr::get(context, executableObjectsAttrName);
   while (op) {
@@ -97,56 +140,39 @@ static ArrayAttr lookUpExecutableObjects(Operation *op) {
   return {};
 }
 
-/// Returns the function name and attributes to use for a ukernel with given
-/// `name` and `suffix` on the target described by `targetAttr`.
-static IREE::GPU::UKernelSpecAttr
-getUKernelSpec(StringRef name, StringRef suffix, MLIRContext *context,
-               IREE::HAL::ExecutableTargetAttr targetAttr) {
-  if (isROCMBackend(targetAttr)) {
-    auto nameAttr = StringAttr::get(
-        context, llvm::formatv("iree_uk_amdgpu_{}_{}", name, suffix));
-    auto defsAttr = DictionaryAttr::get(
-        context, {{StringAttr::get(context, "vm.import.module"),
-                   StringAttr::get(context, "rocm")}});
-    return IREE::GPU::UKernelSpecAttr::get(context, nameAttr, defsAttr);
+// Ensures that the op has ukernel bitcode as a hal.executable.object, stored
+// as a hal.executable.objects attribute on the op itself, ready to be hoisted
+// by the HoistExecutableObjects pass.
+// Returns failure if no bitcode was found for the configured ukernel.
+static LogicalResult
+ensureUKernelBitcode(Operation *op,
+                     IREE::GPU::UKernelConfigAttr ukernelConfig) {
+  constexpr StringLiteral executableObjectsAttrName = "hal.executable.objects";
+  auto target = IREE::HAL::ExecutableTargetAttr::lookup(op);
+  ArrayAttr sourceExecutableObjects =
+      lookUpExecutableObjects(op, executableObjectsAttrName);
+  MLIRContext *context = op->getContext();
+  IREE::HAL::ExecutableObjectAttr bitcodeObject = getUKernelBitcode(
+      context, target, sourceExecutableObjects, ukernelConfig.getName());
+  if (!bitcodeObject) {
+    return failure();
   }
-  return {};
+  op->setAttr(executableObjectsAttrName,
+              ArrayAttr::get(context, bitcodeObject));
+  return success();
 }
 
 } // namespace
 
-IREE::GPU::UKernelSpecAttr selectUKernelForArgmax(linalg::GenericOp op) {
-  if (failed(isArgmaxOp(op))) {
-    return {};
-  }
-  auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(op);
-  const char ukernelName[] = "argmax";
-  if (!hasUkernel(targetAttr, ukernelName)) {
-    return {};
-  }
-  Value input = op.getDpsInputOperand(0)->get();
-  auto inputType = cast<ShapedType>(input.getType());
-  Value index = op.getDpsInitOperand(1)->get();
-  auto indexType = cast<ShapedType>(index.getType());
-  std::string suffix;
-  llvm::raw_string_ostream(suffix)
-      << inputType.getElementType() << indexType.getElementType();
-  MLIRContext *context = op->getContext();
-  IREE::GPU::UKernelSpecAttr ukernelSpec =
-      getUKernelSpec(ukernelName, suffix, context, targetAttr);
-  if (!ukernelSpec) {
+IREE::GPU::UKernelConfigAttr selectUKernel(Operation *op) {
+  IREE::GPU::UKernelConfigAttr ukernelConfig = getUKernelConfig(op);
+  if (!ukernelConfig) {
     return {};
   }
-  auto execTarget = IREE::HAL::ExecutableTargetAttr::lookup(op);
-  ArrayAttr sourceExecutableObjects = lookUpExecutableObjects(op);
-  IREE::HAL::ExecutableObjectAttr bitcodeObject = getUKernelBitcode(
-      context, execTarget, sourceExecutableObjects, ukernelSpec.getName());
-  if (!bitcodeObject) {
+  if (failed(ensureUKernelBitcode(op, ukernelConfig))) {
     return {};
   }
-  op->setAttr(executableObjectsAttrName,
-              ArrayAttr::get(context, bitcodeObject));
-  return ukernelSpec;
+  return ukernelConfig;
 }
 
 } // namespace mlir::iree_compiler
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.h b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.h
index 4ed251b36070..cb7fa2abac61 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUSelectUKernels.h
@@ -10,6 +10,6 @@
 
 namespace mlir::iree_compiler {
 
-IREE::GPU::UKernelSpecAttr selectUKernelForArgmax(linalg::GenericOp op);
+IREE::GPU::UKernelConfigAttr selectUKernel(Operation *op);
 
 } // namespace mlir::iree_compiler