Merge branch 'master' into add_efficientnet_presets_0

pkgoogle · Nov 14, 2024 · 6adc1ab · 6adc1ab
2 parents 11023a8 + 0756fb4
commit 6adc1ab
Show file tree

Hide file tree

Showing 76 changed files with 4,442 additions and 638 deletions.
diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py
@@ -14,6 +14,7 @@
 from keras_hub.src.layers.modeling.reversible_embedding import (
     ReversibleEmbedding,
 )
+from keras_hub.src.layers.modeling.rms_normalization import RMSNormalization
 from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding
 from keras_hub.src.layers.modeling.sine_position_encoding import (
     SinePositionEncoding,
@@ -51,6 +52,10 @@
 from keras_hub.src.models.resnet.resnet_image_converter import (
     ResNetImageConverter,
 )
+from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator
+from keras_hub.src.models.retinanet.retinanet_image_converter import (
+    RetinaNetImageConverter,
+)
 from keras_hub.src.models.sam.sam_image_converter import SAMImageConverter
 from keras_hub.src.models.sam.sam_mask_decoder import SAMMaskDecoder
 from keras_hub.src.models.sam.sam_prompt_encoder import SAMPromptEncoder

diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -162,6 +162,11 @@
 )
 from keras_hub.src.models.falcon.falcon_tokenizer import FalconTokenizer
 from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
+from keras_hub.src.models.flux.flux_model import FluxBackbone
+from keras_hub.src.models.flux.flux_text_to_image import FluxTextToImage
+from keras_hub.src.models.flux.flux_text_to_image_preprocessor import (
+    FluxTextToImagePreprocessor,
+)
 from keras_hub.src.models.gemma.gemma_backbone import GemmaBackbone
 from keras_hub.src.models.gemma.gemma_causal_lm import GemmaCausalLM
 from keras_hub.src.models.gemma.gemma_causal_lm_preprocessor import (
@@ -185,6 +190,10 @@
 from keras_hub.src.models.image_classifier_preprocessor import (
     ImageClassifierPreprocessor,
 )
+from keras_hub.src.models.image_object_detector import ImageObjectDetector
+from keras_hub.src.models.image_object_detector_preprocessor import (
+    ImageObjectDetectorPreprocessor,
+)
 from keras_hub.src.models.image_segmenter import ImageSegmenter
 from keras_hub.src.models.image_segmenter_preprocessor import (
     ImageSegmenterPreprocessor,
@@ -252,6 +261,13 @@
 from keras_hub.src.models.resnet.resnet_image_classifier_preprocessor import (
     ResNetImageClassifierPreprocessor,
 )
+from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone
+from keras_hub.src.models.retinanet.retinanet_object_detector import (
+    RetinaNetObjectDetector,
+)
+from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import (
+    RetinaNetObjectDetectorPreprocessor,
+)
 from keras_hub.src.models.roberta.roberta_backbone import RobertaBackbone
 from keras_hub.src.models.roberta.roberta_masked_lm import RobertaMaskedLM
 from keras_hub.src.models.roberta.roberta_masked_lm_preprocessor import (

diff --git a/keras_hub/src/bounding_box/__init__.py b/keras_hub/src/bounding_box/__init__.py
@@ -0,0 +1,2 @@
+# TODO: Once all bounding boxes are moved to keras repostory remove the
+# bounding box folder.
diff --git a/keras_hub/src/bounding_box/converters.py b/keras_hub/src/bounding_box/converters.py
@@ -20,29 +20,74 @@ class RequiresImagesException(Exception):
 ALL_AXES = 4
 
 
-def _encode_box_to_deltas(
+def encode_box_to_deltas(
     anchors,
     boxes,
-    anchor_format: str,
-    box_format: str,
+    anchor_format,
+    box_format,
+    encoding_format="center_yxhw",
     variance=None,
     image_shape=None,
 ):
-    """Converts bounding_boxes from `center_yxhw` to delta format."""
+    """Encodes bounding boxes relative to anchors as deltas.
+
+    This function calculates the deltas that represent the difference between
+    bounding boxes and provided anchors. Deltas encode the offsets and scaling
+    factors to apply to anchors to obtain the target boxes.
+
+    Boxes and anchors are first converted to the specified `encoding_format`
+    (defaulting to `center_yxhw`) for consistent delta representation.
+
+    Args:
+        anchors: `Tensors`. Anchor boxes with shape of `(N, 4)` where N is the
+            number of anchors.
+        boxes:  `Tensors` Bounding boxes to encode. Boxes can be of shape
+            `(B, N, 4)` or `(N, 4)`.
+        anchor_format: str. The format of the input `anchors`
+            (e.g., "xyxy", "xywh", etc.).
+        box_format: str. The format of the input `boxes`
+            (e.g., "xyxy", "xywh", etc.).
+        encoding_format: str. The intermediate format to which boxes and anchors
+            are converted before delta calculation. Defaults to "center_yxhw".
+        variance: `List[float]`. A 4-element array/tensor representing variance
+            factors to scale the box deltas. If provided, the calculated deltas
+            are divided by the variance. Defaults to None.
+        image_shape: `Tuple[int]`. The shape of the image (height, width, 3).
+            When using relative bounding box format for `box_format` the
+            `image_shape` is used for normalization.
+    Returns:
+        Encoded box deltas. The return type matches the `encode_format`.
+
+    Raises:
+        ValueError: If `variance` is not None and its length is not 4.
+        ValueError: If `encoding_format` is not `"center_xywh"` or
+            `"center_yxhw"`.
+
+    """
     if variance is not None:
         variance = ops.convert_to_tensor(variance, "float32")
         var_len = variance.shape[-1]
 
         if var_len != 4:
             raise ValueError(f"`variance` must be length 4, got {variance}")
+
+    if encoding_format not in ["center_xywh", "center_yxhw"]:
+        raise ValueError(
+            "`encoding_format` should be one of 'center_xywh' or 'center_yxhw', "
+            f"got {encoding_format}"
+        )
+
     encoded_anchors = convert_format(
         anchors,
         source=anchor_format,
-        target="center_yxhw",
+        target=encoding_format,
         image_shape=image_shape,
     )
     boxes = convert_format(
-        boxes, source=box_format, target="center_yxhw", image_shape=image_shape
+        boxes,
+        source=box_format,
+        target=encoding_format,
+        image_shape=image_shape,
     )
     anchor_dimensions = ops.maximum(
         encoded_anchors[..., 2:], keras.backend.epsilon()
@@ -61,27 +106,72 @@ def _encode_box_to_deltas(
     return boxes_delta
 
 
-def _decode_deltas_to_boxes(
+def decode_deltas_to_boxes(
     anchors,
     boxes_delta,
-    anchor_format: str,
-    box_format: str,
+    anchor_format,
+    box_format,
+    encoded_format="center_yxhw",
     variance=None,
     image_shape=None,
 ):
-    """Converts bounding_boxes from delta format to `center_yxhw`."""
+    """Converts bounding boxes from delta format to the specified `box_format`.
+
+    This function decodes bounding box deltas relative to anchors to obtain the
+    final bounding box coordinates. The boxes are encoded in a specific
+    `encoded_format` (center_yxhw by default) during the decoding process.
+    This allows flexibility in how the deltas are applied to the anchors.
+
+    Args:
+        anchors: Can be `Tensors` or `Dict[Tensors]` where keys are level
+            indices and values are corresponding anchor boxes.
+            The shape of the array/tensor should be `(N, 4)` where N is the
+            number of anchors.
+        boxes_delta Can be `Tensors` or `Dict[Tensors]` Bounding box deltas
+            must have the same type and structure as `anchors`.  The
+            shape of the array/tensor can be `(N, 4)` or `(B, N, 4)` where N is
+            the number of boxes.
+        anchor_format: str. The format of the input `anchors`.
+            (e.g., `"xyxy"`, `"xywh"`, etc.)
+        box_format: str. The desired format for the output boxes.
+            (e.g., `"xyxy"`, `"xywh"`, etc.)
+        encoded_format: str. Raw output format from regression head. Defaults
+            to `"center_yxhw"`.
+        variance: `List[floats]`. A 4-element array/tensor representing
+            variance factors to scale the box deltas. If provided, the deltas
+            are multiplied by the variance before being applied to the anchors.
+            Defaults to None.
+        image_shape:  The shape of the image (height, width).  This is needed
+            if normalization to image size is required when converting between
+            formats. Defaults to None.
+
+    Returns:
+        Decoded box coordinates. The return type matches the `box_format`.
+
+    Raises:
+        ValueError: If `variance` is not None and its length is not 4.
+        ValueError: If `encoded_format` is not `"center_xywh"` or
+            `"center_yxhw"`.
+
+    """
     if variance is not None:
         variance = ops.convert_to_tensor(variance, "float32")
         var_len = variance.shape[-1]
 
         if var_len != 4:
             raise ValueError(f"`variance` must be length 4, got {variance}")
 
+    if encoded_format not in ["center_xywh", "center_yxhw"]:
+        raise ValueError(
+            f"`encoded_format` should be 'center_xywh' or 'center_yxhw', "
+            f"but got '{encoded_format}'."
+        )
+
     def decode_single_level(anchor, box_delta):
         encoded_anchor = convert_format(
             anchor,
             source=anchor_format,
-            target="center_yxhw",
+            target=encoded_format,
             image_shape=image_shape,
         )
         if variance is not None:
@@ -97,7 +187,7 @@ def decode_single_level(anchor, box_delta):
         )
         box = convert_format(
             box,
-            source="center_yxhw",
+            source=encoded_format,
             target=box_format,
             image_shape=image_shape,
         )

diff --git a/keras_hub/src/layers/modeling/rms_normalization.py b/keras_hub/src/layers/modeling/rms_normalization.py
@@ -0,0 +1,34 @@
+import keras
+from keras import ops
+
+from keras_hub.src.api_export import keras_hub_export
+
+
+@keras_hub_export("keras_hub.layers.RMSNormalization")
+class RMSNormalization(keras.layers.Layer):
+    """
+    Root Mean Square (RMS) Normalization layer.
+    This layer normalizes the input tensor based on its RMS value and applies
+    a learned scaling factor.
+    Args:
+        input_dim: int. The dimensionality of the input tensor.
+    """
+
+    def __init__(self, input_dim):
+        super().__init__()
+        self.scale = self.add_weight(
+            name="scale", shape=(input_dim,), initializer="ones"
+        )
+
+    def call(self, x):
+        """
+        Applies RMS normalization to the input tensor.
+        Args:
+            x: KerasTensor. Input tensor of shape (batch_size, input_dim).
+        Returns:
+            KerasTensor: The RMS-normalized tensor of the same shape (batch_size, input_dim),
+            scaled by the learned `scale` parameter.
+        """
+        x = ops.cast(x, float)
+        rrms = ops.rsqrt(ops.mean(ops.square(x), axis=-1, keepdims=True) + 1e-6)
+        return (x * rrms) * self.scale
diff --git a/keras_hub/src/layers/modeling/transformer_encoder.py b/keras_hub/src/layers/modeling/transformer_encoder.py
@@ -215,7 +215,6 @@ def call(
                 return_attention_scores=return_attention_scores,
                 training=training,
             )
-            return x, attention_scores
         else:
             x = self._self_attention_layer(
                 query=x,

diff --git a/keras_hub/src/layers/preprocessing/image_converter.py b/keras_hub/src/layers/preprocessing/image_converter.py
@@ -164,6 +164,11 @@ def _expand_non_channel_dims(self, value, inputs):
         # If inputs are not a tensor type, return a numpy array.
         # This might happen when running under tf.data.
         if ops.is_tensor(inputs):
+            # preprocessing decorator moves tensors to cpu in torch backend and
+            # processed on CPU, and then converted back to the appropriate
+            # device (potentially GPU) after preprocessing.
+            if keras.backend.backend() == "torch" and self.image_size is None:
+                return ops.expand_dims(value, broadcast_dims).cpu()
             return ops.expand_dims(value, broadcast_dims)
         else:
             return np.expand_dims(value, broadcast_dims)

diff --git a/keras_hub/src/models/clip/__init__.py b/keras_hub/src/models/clip/__init__.py
@@ -0,0 +1,5 @@
+from keras_hub.src.models.clip.clip_backbone import CLIPBackbone
+from keras_hub.src.models.clip.clip_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+
+register_presets(backbone_presets, CLIPBackbone)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# TODO: Once all bounding boxes are moved to keras repostory remove the
		# bounding box folder.