Skip to content

Commit

Permalink
Merge branch 'keras-team:master' into port_mobilenet
Browse files Browse the repository at this point in the history
  • Loading branch information
pkgoogle authored Feb 5, 2025
2 parents ef9cb73 + a80ea28 commit fe34e36
Show file tree
Hide file tree
Showing 25 changed files with 195 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/scripts/auto-assignment.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ module.exports = async ({ github, context }) => {
// Is this an issue? If so, assign the issue number. Otherwise, assign the PR number.
if (context.payload.issue) {
//assignee List for issues.
assigneesList = ["SuryanarayanaY", "sachinprasadhs"];
assigneesList = ["mehtamansi29", "sonali-kumari1", "dhantule", "sachinprasadhs"];
issueNumber = context.payload.issue.number;
} else {
//assignee List for PRs.
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ print(keras_hub.utils.decode_imagenet_predictions(preds))
Load a Bert model and fine-tune it on IMDb movie reviews:

```python
classifier = keras_hub.models.BertClassifier.from_preset(
classifier = keras_hub.models.TextClassifier.from_preset(
"bert_base_en_uncased",
activation="softmax",
num_classes=2,
Expand Down
10 changes: 10 additions & 0 deletions keras_hub/src/layers/preprocessing/image_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def __init__(
scale=None,
offset=None,
crop_to_aspect_ratio=True,
pad_to_aspect_ratio=False,
interpolation="bilinear",
data_format=None,
**kwargs,
Expand All @@ -112,12 +113,19 @@ def __init__(

super().__init__(**kwargs)

if crop_to_aspect_ratio and pad_to_aspect_ratio:
raise ValueError(
"Only one of 'crop_to_aspect_ratio' or 'pad_to_aspect_ratio' "
"can be True."
)

# Create the `Resizing` layer here even if it's not being used. That
# allows us to make `image_size` a settable property.
self.resizing = keras.layers.Resizing(
height=image_size[0] if image_size else None,
width=image_size[1] if image_size else None,
crop_to_aspect_ratio=crop_to_aspect_ratio,
pad_to_aspect_ratio=pad_to_aspect_ratio,
interpolation=interpolation,
data_format=data_format,
dtype=self.dtype_policy,
Expand All @@ -126,6 +134,7 @@ def __init__(
self.scale = scale
self.offset = offset
self.crop_to_aspect_ratio = crop_to_aspect_ratio
self.pad_to_aspect_ratio = pad_to_aspect_ratio
self.interpolation = interpolation
self.data_format = standardize_data_format(data_format)

Expand Down Expand Up @@ -182,6 +191,7 @@ def get_config(self):
"offset": self.offset,
"interpolation": self.interpolation,
"crop_to_aspect_ratio": self.crop_to_aspect_ratio,
"pad_to_aspect_ratio": self.pad_to_aspect_ratio,
}
)
return config
Expand Down
23 changes: 22 additions & 1 deletion keras_hub/src/layers/preprocessing/image_converter_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import pathlib

import keras
import numpy as np
import pytest
from absl.testing import parameterized
from keras import ops

from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
Expand Down Expand Up @@ -33,11 +35,21 @@ def test_unbatched(self):
self.assertAllClose(outputs[:, :, 1], np.ones((4, 4)) * 0.301569)
self.assertAllClose(outputs[:, :, 2], np.ones((4, 4)) * 0.852353)

def test_resize_batch(self):
@parameterized.parameters(
(True, False),
(False, True),
)
@pytest.mark.skipif(
keras.config.backend() == "torch",
reason="disabled until resize is fixed for torch backend",
) # TODO: remove skip after new release with fix of https://github.com/keras-team/keras/pull/20797
def test_resize_batch(self, crop_to_aspect_ratio, pad_to_aspect_ratio):
converter = ImageConverter(
image_size=(4, 4),
scale=(1.0 / 255.0, 0.8 / 255.0, 1.2 / 255.0),
offset=(0.2, -0.1, 0.25),
crop_to_aspect_ratio=crop_to_aspect_ratio,
pad_to_aspect_ratio=pad_to_aspect_ratio,
)
inputs = np.ones((2, 10, 10, 3)) * 128
outputs = converter(inputs)
Expand All @@ -46,6 +58,15 @@ def test_resize_batch(self):
self.assertAllClose(outputs[:, :, :, 1], np.ones((2, 4, 4)) * 0.301569)
self.assertAllClose(outputs[:, :, :, 2], np.ones((2, 4, 4)) * 0.852353)

def test_pad_and_crop_to_aspect_ratio(self):
with self.assertRaisesRegex(ValueError, "Only one of"):
_ = ImageConverter(
image_size=(4, 4),
scale=1 / 255.0,
crop_to_aspect_ratio=True,
pad_to_aspect_ratio=True,
)

def test_config(self):
converter = ImageConverter(
image_size=(12, 20),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_config(self):
"vocabulary_size": self.vocabulary_size,
"unselectable_token_ids": unselectable_token_ids,
}
self.assertDictContainsSubset(expected_config, config)
self.assertEqual(config, {**config, **expected_config})

# Test cloned masked_lm_masker can be run.
cloned_masked_lm_masker = MaskedLMMaskGenerator.from_config(config)
Expand Down
16 changes: 15 additions & 1 deletion keras_hub/src/models/basnet/basnet_presets.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
"""BASNet model preset configurations."""

basnet_presets = {}
basnet_presets = {
"basnet_duts": {
"metadata": {
"description": (
"BASNet model with a 34-layer ResNet backbone, pre-trained "
"on the DUTS image dataset at a 288x288 resolution. Model "
"training was performed by Hamid Ali "
"(https://github.com/hamidriasat/BASNet)."
),
"params": 108886792,
"path": "basnet",
},
"kaggle_handle": "kaggle://keras/basnet/keras/base1",
},
}
1 change: 0 additions & 1 deletion keras_hub/src/models/basnet/basnet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def test_end_to_end_model_predict(self):
output = model.predict(self.images)
self.assertAllEqual(output.shape, (2, 64, 64, 1))

@pytest.mark.skip(reason="disabled until preset's been uploaded to Kaggle")
@pytest.mark.extra_large
def test_all_presets(self):
for preset in BASNetImageSegmenter.presets:
Expand Down
4 changes: 2 additions & 2 deletions keras_hub/src/models/efficientnet/cba_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def test_same_input_output_shapes(self):
layer = CBABlock(input_filters=32, output_filters=32)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 32))
self.assertEqual(output.shape, (1, 64, 64, 32))
self.assertLen(output, 1)

def test_different_input_output_shapes(self):
inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
layer = CBABlock(input_filters=32, output_filters=48)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 48))
self.assertEqual(output.shape, (1, 64, 64, 48))
self.assertLen(output, 1)
12 changes: 6 additions & 6 deletions keras_hub/src/models/efficientnet/efficientnet_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,24 @@ def test_feature_pyramid_outputs(self):
height = width = 256
outputs = model(keras.ops.ones(shape=(batch_size, height, width, 3)))
levels = ["P1", "P2", "P3", "P4", "P5"]
self.assertEquals(list(outputs.keys()), levels)
self.assertEquals(
self.assertEqual(list(outputs.keys()), levels)
self.assertEqual(
outputs["P1"].shape,
(batch_size, height // 2**1, width // 2**1, 24),
)
self.assertEquals(
self.assertEqual(
outputs["P2"].shape,
(batch_size, height // 2**2, width // 2**2, 48),
)
self.assertEquals(
self.assertEqual(
outputs["P3"].shape,
(batch_size, height // 2**3, width // 2**3, 64),
)
self.assertEquals(
self.assertEqual(
outputs["P4"].shape,
(batch_size, height // 2**4, width // 2**4, 160),
)
self.assertEquals(
self.assertEqual(
outputs["P5"].shape,
(batch_size, height // 2**5, width // 2**5, 1280),
)
Expand Down
6 changes: 3 additions & 3 deletions keras_hub/src/models/efficientnet/fusedmbconv_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ def test_same_input_output_shapes(self):
layer = FusedMBConvBlock(input_filters=32, output_filters=32)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 32))
self.assertEqual(output.shape, (1, 64, 64, 32))
self.assertLen(output, 1)

def test_different_input_output_shapes(self):
inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
layer = FusedMBConvBlock(input_filters=32, output_filters=48)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 48))
self.assertEqual(output.shape, (1, 64, 64, 48))
self.assertLen(output, 1)

def test_squeeze_excitation_ratio(self):
Expand All @@ -28,5 +28,5 @@ def test_squeeze_excitation_ratio(self):
)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 48))
self.assertEqual(output.shape, (1, 64, 64, 48))
self.assertLen(output, 1)
6 changes: 3 additions & 3 deletions keras_hub/src/models/efficientnet/mbconv_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ def test_same_input_output_shapes(self):
layer = MBConvBlock(input_filters=32, output_filters=32)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 32))
self.assertEqual(output.shape, (1, 64, 64, 32))
self.assertLen(output, 1)

def test_different_input_output_shapes(self):
inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
layer = MBConvBlock(input_filters=32, output_filters=48)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 48))
self.assertEqual(output.shape, (1, 64, 64, 48))
self.assertLen(output, 1)

def test_squeeze_excitation_ratio(self):
inputs = keras.random.normal(shape=(1, 64, 64, 32), dtype="float32")
layer = MBConvBlock(input_filters=32, output_filters=48, se_ratio=0.25)

output = layer(inputs)
self.assertEquals(output.shape, (1, 64, 64, 48))
self.assertEqual(output.shape, (1, 64, 64, 48))
self.assertLen(output, 1)
9 changes: 6 additions & 3 deletions keras_hub/src/models/falcon/falcon_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,19 @@ def call(

attention_scores = ops.einsum("bqnh,bknh->bnqk", query, key)
attention_scores = ops.add(attention_scores, alibi)
attention_scores = (
attention_scores * self.inv_norm_factor
) # [batch_size, num_heads, query_length, kv_length]
# [batch_size, num_heads, query_length, kv_length]
attention_scores = ops.multiply(
attention_scores,
ops.cast(self.inv_norm_factor, self.compute_dtype),
)
attention_scores = self.softmax(
attention_scores, ops.expand_dims(attention_mask, 1)
)
attention_scores = self.attention_dropout(attention_scores)
attention_output = ops.einsum(
"bnqk,bknh->bqnh", attention_scores, value
)

attention_output = ops.reshape(
attention_output,
[batch_size, seq_length, self.num_heads * self.head_dim],
Expand Down
4 changes: 2 additions & 2 deletions keras_hub/src/models/gemma/gemma_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,10 @@ def __init__(

# === Functional Model ===
token_id_input = keras.Input(
shape=(None,), dtype="float32", name="token_ids"
shape=(None,), dtype="int32", name="token_ids"
)
padding_mask_input = keras.Input(
shape=(None,), dtype="float32", name="padding_mask"
shape=(None,), dtype="int32", name="padding_mask"
)
x = self.token_embedding(token_id_input)
x = x * ops.cast(ops.sqrt(hidden_dim), x.dtype)
Expand Down
29 changes: 23 additions & 6 deletions keras_hub/src/models/gpt_neo_x/gpt_neo_x_attention.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import math

import keras
from keras import ops

from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding
from keras_hub.src.utils.keras_utils import clone_initializer
from keras_hub.src.utils.keras_utils import has_flash_attention_support


class GPTNeoXAttention(keras.layers.Layer):
Expand Down Expand Up @@ -58,6 +61,8 @@ def __init__(
self.bias_initializer = keras.initializers.get(bias_initializer)
self.max_sequence_length = max_sequence_length

self._inv_norm_factor = 1.0 / math.sqrt(self.attn_head_size)

def build(self, input_shape):
self._qkv_dense = keras.layers.EinsumDense(
equation="abc,cde->abde",
Expand Down Expand Up @@ -120,14 +125,26 @@ def _masked_softmax(self, attention_scores, attention_mask=None):
def _compute_attention(
self, query, key, value, attention_mask=None, training=None
):
attention_scores = ops.einsum("aecd,abcd->acbe", key, query)
if has_flash_attention_support() and self.dropout == 0:
# Use `dot_product_attention` with Flash Attention support if
# available.
if attention_mask is not None:
attention_mask = ops.expand_dims(attention_mask, axis=1)
attention_mask = ops.cast(attention_mask, dtype="bool")
attention_output = ops.dot_product_attention(
query,
key,
value,
mask=attention_mask,
scale=self._inv_norm_factor,
)
return attention_output

norm_factor = ops.sqrt(
ops.convert_to_tensor(self.attn_head_size, self.compute_dtype)
attention_scores = ops.einsum("aecd,abcd->acbe", key, query)
attention_scores = ops.multiply(
attention_scores,
ops.cast(self._inv_norm_factor, self.compute_dtype),
)

attention_scores /= norm_factor

attention_scores = self._masked_softmax(
attention_scores, attention_mask
)
Expand Down
25 changes: 23 additions & 2 deletions keras_hub/src/models/llama/llama_attention.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import math

import keras
from keras import ops

from keras_hub.src.layers.modeling.rotary_embedding import RotaryEmbedding
from keras_hub.src.utils.keras_utils import clone_initializer
from keras_hub.src.utils.keras_utils import has_flash_attention_support


class LlamaAttention(keras.layers.Layer):
Expand Down Expand Up @@ -43,7 +46,7 @@ def build(self, inputs_shape):
# h = head dim
hidden_dim = inputs_shape[-1]
head_dim = hidden_dim // self.num_query_heads
self._norm_factor = ops.sqrt(ops.cast(head_dim, self.compute_dtype))
self._inv_norm_factor = 1.0 / math.sqrt(head_dim)

self._query_dense = keras.layers.EinsumDense(
equation="bqm,muh->bquh",
Expand Down Expand Up @@ -182,9 +185,27 @@ def _masked_softmax(self, attention_scores, attention_mask=None):
return self._softmax(attention_scores)

def _compute_attention(self, query, key, value, attention_mask=None):
if has_flash_attention_support():
# Use `dot_product_attention` with Flash Attention support if
# available.
if attention_mask is not None:
attention_mask = ops.expand_dims(attention_mask, axis=1)
attention_mask = ops.cast(attention_mask, dtype="bool")
attention_output = ops.dot_product_attention(
query,
key,
value,
mask=attention_mask,
scale=self._inv_norm_factor,
)
return attention_output

attention_scores = ops.einsum(self._dot_product_equation, query, key)

attention_scores = attention_scores / self._norm_factor
attention_scores = ops.multiply(
attention_scores,
ops.cast(self._inv_norm_factor, self.compute_dtype),
)
attention_scores = self._masked_softmax(
attention_scores, attention_mask
)
Expand Down
Loading

0 comments on commit fe34e36

Please sign in to comment.