Implement torch.ops.aten.embedding_renorm_ (#8091)

guyao · web-flow · commit 5872b209bd93 · 2024-09-30T10:58:28.000-07:00
diff --git a/experimental/torch_xla2/test/test_ops.py b/experimental/torch_xla2/test/test_ops.py
@@ -96,7 +96,6 @@
     "nn.functional.dropout3d",
     "nn.functional.dropout",
     "nn.functional.embedding_bag",
-    "nn.functional.embedding",
     "nn.functional.fractional_max_pool2d",
     "nn.functional.fractional_max_pool3d",
     "nn.functional.group_norm",
diff --git a/experimental/torch_xla2/torch_xla2/ops/jaten.py b/experimental/torch_xla2/torch_xla2/ops/jaten.py
@@ -367,9 +367,30 @@ def _aten_bmm(x, y):
 
 @op(torch.ops.aten.embedding)
 # embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False)
-def _aten_embedding(a, w, padding_idx=-1):
+def _aten_embedding(a, w, padding_idx=-1, scale_grad_by_freq=False, sparse=False):
   return jnp.take(a, w, axis=0)
 
+@op(torch.ops.aten.embedding_renorm_)
+def _aten_embedding_renorm_(weight, indices, max_norm, norm_type):
+  # Adapted from https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/Embedding.cpp
+  unique_indices = jnp.unique(indices)
+
+  norm = jnp.linalg.norm(
+      _aten_embedding(weight, unique_indices),
+      ord=norm_type,
+      axis=1,
+  )
+
+  indice_idx = jnp.where(norm > max_norm)
+
+  scale = max_norm / (norm[indice_idx] + 1e-7)
+
+  indices_to_update = unique_indices[indice_idx]
+
+  weight = weight.at[indices_to_update].set(
+      weight[indices_to_update] * scale[:, None]
+  )
+  return weight
 
 #- func: _embedding_bag_forward_only(
 # Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False,