keras-team
diff --git a/‎benchmarks/model_benchmark/image_classification_benchmark.py
Lines changed: 1 addition & 1 deletion b/‎benchmarks/model_benchmark/image_classification_benchmark.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/demo_custom_layer_backend_agnostic.py
Lines changed: 0 additions & 1 deletion b/‎examples/demo_custom_layer_backend_agnostic.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/demo_custom_torch_workflow.py
Lines changed: 2 additions & 1 deletion b/‎examples/demo_custom_torch_workflow.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/demo_jax_distributed.py
Lines changed: 6 additions & 1 deletion b/‎examples/demo_jax_distributed.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎examples/keras_io/structured_data/collaborative_filtering_movielens.py
Lines changed: 8 additions & 2 deletions b/‎examples/keras_io/structured_data/collaborative_filtering_movielens.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎examples/keras_io/tensorflow/vision/image_captioning.py
Lines changed: 47 additions & 16 deletions b/‎examples/keras_io/tensorflow/vision/image_captioning.py
Lines changed: 47 additions & 16 deletions
diff --git a/‎examples/keras_io/tensorflow/vision/image_classification_with_vision_transformer.py
Lines changed: 11 additions & 4 deletions b/‎examples/keras_io/tensorflow/vision/image_classification_with_vision_transformer.py
Lines changed: 11 additions & 4 deletions
diff --git a/‎examples/keras_io/tensorflow/vision/involution.py
Lines changed: 48 additions & 10 deletions b/‎examples/keras_io/tensorflow/vision/involution.py
Lines changed: 48 additions & 10 deletions
@@ -1,6 +1,6 @@
 """Image classification benchmark.
 
-This scripts runs image classification benchmark with dogs vs cats datasets. It 
+This script runs image classification benchmark with "dogs vs cats" datasets. It 
 supports the following 3 models:
 - EfficientNetV2B0
 - Xception
 
@@ -2,7 +2,6 @@
 
 import keras_core
 from keras_core import Model
-from keras_core import backend
 from keras_core import initializers
 from keras_core import layers
 from keras_core import losses
 
@@ -98,6 +98,7 @@ def train(model, train_loader, num_epochs, optimizer, loss_fn):
 ######## Using a Keras model or layer in a torch Module ########
 ################################################################
 
+
 class MyModel(nn.Module):
     def __init__(self):
         super().__init__()
@@ -126,4 +127,4 @@ def forward(self, x):
 # Instantiate the torch loss function
 loss_fn = nn.CrossEntropyLoss()
 
-train(torch_module, train_loader, num_epochs, optimizer, loss_fn)
+train(torch_module, train_loader, num_epochs, optimizer, loss_fn)
@@ -157,7 +157,12 @@ def make_model():
 # data will be split along the batch axis
 data_mesh = Mesh(devices, axis_names=("batch",))  # naming axes of the mesh
 # naming axes of the sharded partition
-data_sharding = NamedSharding(data_mesh,P("batch",),)
+data_sharding = NamedSharding(
+    data_mesh,
+    P(
+        "batch",
+    ),
+)
 
 # all variables will be replicated on all devices
 var_mesh = Mesh(devices, axis_names=("_"))
 
@@ -40,6 +40,7 @@
 import keras_core as keras
 from keras_core import layers
 from keras_core import ops
+
 """
 ## First, load the data and apply preprocessing
 """
@@ -97,7 +98,11 @@
 df = df.sample(frac=1, random_state=42)
 x = df[["user", "movie"]].values
 # Normalize the targets between 0 and 1. Makes it easy to train.
-y = df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
+y = (
+    df["rating"]
+    .apply(lambda x: (x - min_rating) / (max_rating - min_rating))
+    .values
+)
 # Assuming training on 90% of the data and validating on 10%.
 train_indices = int(0.9 * df.shape[0])
 x_train, x_val, y_train, y_val = (
@@ -204,7 +209,8 @@ def call(self, inputs):
 ratings = model.predict(user_movie_array).flatten()
 top_ratings_indices = ratings.argsort()[-10:][::-1]
 recommended_movie_ids = [
-    movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
+    movie_encoded2movie.get(movies_not_watched[x][0])
+    for x in top_ratings_indices
 ]
 
 print("Showing recommendations for user: {}".format(user_id))
 
@@ -178,7 +178,9 @@ def train_val_split(caption_data, train_size=0.8, shuffle=True):
 
 def custom_standardization(input_string):
     lowercase = tf.strings.lower(input_string)
-    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")
+    return tf.strings.regex_replace(
+        lowercase, "[%s]" % re.escape(strip_chars), ""
+    )
 
 
 strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
@@ -263,7 +265,9 @@ def get_cnn_model():
     # We freeze our feature extractor
     base_model.trainable = False
     base_model_out = base_model.output
-    base_model_out = layers.Reshape((-1, base_model_out.shape[-1]))(base_model_out)
+    base_model_out = layers.Reshape((-1, base_model_out.shape[-1]))(
+        base_model_out
+    )
     cnn_model = keras.models.Model(base_model.input, base_model_out)
     return cnn_model
 
@@ -342,7 +346,9 @@ def __init__(self, embed_dim, ff_dim, num_heads, **kwargs):
         self.layernorm_3 = layers.LayerNormalization()
 
         self.embedding = PositionalEmbedding(
-            embed_dim=EMBED_DIM, sequence_length=SEQ_LENGTH, vocab_size=VOCAB_SIZE
+            embed_dim=EMBED_DIM,
+            sequence_length=SEQ_LENGTH,
+            vocab_size=VOCAB_SIZE,
         )
         self.out = layers.Dense(VOCAB_SIZE, activation="softmax")
 
@@ -394,7 +400,10 @@ def get_causal_attention_mask(self, inputs):
         mask = tf.cast(i >= j, dtype="int32")
         mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
         mult = tf.concat(
-            [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
+            [
+                tf.expand_dims(batch_size, -1),
+                tf.constant([1, 1], dtype=tf.int32),
+            ],
             axis=0,
         )
         return tf.tile(mask, mult)
@@ -431,7 +440,9 @@ def calculate_accuracy(self, y_true, y_pred, mask):
         mask = tf.cast(mask, dtype=tf.float32)
         return tf.reduce_sum(accuracy) / tf.reduce_sum(mask)
 
-    def _compute_caption_loss_and_acc(self, img_embed, batch_seq, training=True):
+    def _compute_caption_loss_and_acc(
+        self, img_embed, batch_seq, training=True
+    ):
         encoder_out = self.encoder(img_embed, training=training)
         batch_seq_inp = batch_seq[:, :-1]
         batch_seq_true = batch_seq[:, 1:]
@@ -469,7 +480,8 @@ def train_step(self, batch_data):
 
             # 4. Get the list of all the trainable weights
             train_vars = (
-                self.encoder.trainable_variables + self.decoder.trainable_variables
+                self.encoder.trainable_variables
+                + self.decoder.trainable_variables
             )
 
             # 5. Get the gradients
@@ -484,7 +496,10 @@ def train_step(self, batch_data):
         self.acc_tracker.update_state(batch_acc)
 
         # 8. Return the loss and accuracy values
-        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
+        return {
+            "loss": self.loss_tracker.result(),
+            "acc": self.acc_tracker.result(),
+        }
 
     def test_step(self, batch_data):
         batch_img, batch_seq = batch_data
@@ -513,7 +528,10 @@ def test_step(self, batch_data):
         self.acc_tracker.update_state(batch_acc)
 
         # 5. Return the loss and accuracy values
-        return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
+        return {
+            "loss": self.loss_tracker.result(),
+            "acc": self.acc_tracker.result(),
+        }
 
     @property
     def metrics(self):
@@ -523,8 +541,12 @@ def metrics(self):
 
 
 cnn_model = get_cnn_model()
-encoder = TransformerEncoderBlock(embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1)
-decoder = TransformerDecoderBlock(embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2)
+encoder = TransformerEncoderBlock(
+    embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1
+)
+decoder = TransformerDecoderBlock(
+    embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2
+)
 caption_model = ImageCaptioningModel(
     cnn_model=cnn_model,
     encoder=encoder,
@@ -539,15 +561,20 @@ def metrics(self):
 
 # Define the loss function
 cross_entropy = keras.losses.SparseCategoricalCrossentropy(
-    from_logits=False, reduction=None,
+    from_logits=False,
+    reduction=None,
 )
 
 # EarlyStopping criteria
-early_stopping = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
+early_stopping = keras.callbacks.EarlyStopping(
+    patience=3, restore_best_weights=True
+)
 
 
 # Learning Rate Scheduler for the optimizer
-class LRSchedule(keras.optimizers.schedules.learning_rate_schedule.LearningRateSchedule):
+class LRSchedule(
+    keras.optimizers.schedules.learning_rate_schedule.LearningRateSchedule
+):
     def __init__(self, post_warmup_learning_rate, warmup_steps):
         super().__init__()
         self.post_warmup_learning_rate = post_warmup_learning_rate
@@ -568,10 +595,14 @@ def __call__(self, step):
 # Create a learning rate schedule
 num_train_steps = len(train_dataset) * EPOCHS
 num_warmup_steps = num_train_steps // 15
-lr_schedule = LRSchedule(post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps)
+lr_schedule = LRSchedule(
+    post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps
+)
 
 # Compile the model
-caption_model.compile(optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy)
+caption_model.compile(
+    optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy
+)
 
 # Fit the model
 caption_model.fit(
@@ -639,4 +670,4 @@ def generate_caption():
 this example easily runnable, we have trained it with a few constraints, like a minimal
 number of attention heads. To improve the predictions, you can try changing these training
 settings and find a good model for your use case.
-"""
+"""
@@ -58,7 +58,10 @@
     projection_dim,
 ]  # Size of the transformer layers
 transformer_layers = 8
-mlp_head_units = [2048, 1024]  # Size of the dense layers of the final classifier
+mlp_head_units = [
+    2048,
+    1024,
+]  # Size of the dense layers of the final classifier
 
 
 """
@@ -218,7 +221,9 @@ def create_vit_classifier():
     representation = layers.Flatten()(representation)
     representation = layers.Dropout(0.5)(representation)
     # Add MLP.
-    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
+    features = mlp(
+        representation, hidden_units=mlp_head_units, dropout_rate=0.5
+    )
     # Classify outputs.
     logits = layers.Dense(num_classes)(features)
     # Create the Keras model.
@@ -241,7 +246,9 @@ def run_experiment(model):
         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
         metrics=[
             keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
-            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
+            keras.metrics.SparseTopKCategoricalAccuracy(
+                5, name="top-5-accuracy"
+            ),
         ],
     )
 
@@ -288,4 +295,4 @@ def run_experiment(model):
 but also by parameters such as the learning rate schedule, optimizer, weight decay, etc.
 In practice, it's recommended to fine-tune a ViT model
 that was pre-trained using a large, high-resolution dataset.
-"""
+"""
@@ -123,7 +123,9 @@ def build(self, input_shape):
                 keras.layers.BatchNormalization(),
                 keras.layers.ReLU(),
                 keras.layers.Conv2D(
-                    filters=self.kernel_size * self.kernel_size * self.group_number,
+                    filters=self.kernel_size
+                    * self.kernel_size
+                    * self.group_number,
                     kernel_size=1,
                 ),
             ]
@@ -198,22 +200,39 @@ def call(self, x):
 
 # Compute involution with stride 1.
 output_tensor, _ = Involution(
-    channel=3, group_number=1, kernel_size=5, stride=1, reduction_ratio=1, name="inv_1"
+    channel=3,
+    group_number=1,
+    kernel_size=5,
+    stride=1,
+    reduction_ratio=1,
+    name="inv_1",
 )(input_tensor)
 print(f"with stride 1 ouput shape: {output_tensor.shape}")
 
 # Compute involution with stride 2.
 output_tensor, _ = Involution(
-    channel=3, group_number=1, kernel_size=5, stride=2, reduction_ratio=1, name="inv_2"
+    channel=3,
+    group_number=1,
+    kernel_size=5,
+    stride=2,
+    reduction_ratio=1,
+    name="inv_2",
 )(input_tensor)
 print(f"with stride 2 ouput shape: {output_tensor.shape}")
 
 # Compute involution with stride 1, channel 16 and reduction ratio 2.
 output_tensor, _ = Involution(
-    channel=16, group_number=1, kernel_size=5, stride=1, reduction_ratio=2, name="inv_3"
+    channel=16,
+    group_number=1,
+    kernel_size=5,
+    stride=1,
+    reduction_ratio=2,
+    name="inv_3",
 )(input_tensor)
 print(
-    "with channel 16 and reduction ratio 2 ouput shape: {}".format(output_tensor.shape)
+    "with channel 16 and reduction ratio 2 ouput shape: {}".format(
+        output_tensor.shape
+    )
 )
 
 """
@@ -250,7 +269,9 @@ def call(self, x):
     .shuffle(256)
     .batch(256)
 )
-test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(256)
+test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(
+    256
+)
 
 """
 ## Visualise the data
@@ -287,7 +308,9 @@ def call(self, x):
 print("building the convolution model...")
 conv_model = keras.Sequential(
     [
-        keras.layers.Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding="same"),
+        keras.layers.Conv2D(
+            32, (3, 3), input_shape=(32, 32, 3), padding="same"
+        ),
         keras.layers.ReLU(name="relu1"),
         keras.layers.MaxPooling2D((2, 2)),
         keras.layers.Conv2D(64, (3, 3), padding="same"),
@@ -323,17 +346,32 @@ def call(self, x):
 
 inputs = keras.Input(shape=(32, 32, 3))
 x, _ = Involution(
-    channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_1"
+    channel=3,
+    group_number=1,
+    kernel_size=3,
+    stride=1,
+    reduction_ratio=2,
+    name="inv_1",
 )(inputs)
 x = keras.layers.ReLU()(x)
 x = keras.layers.MaxPooling2D((2, 2))(x)
 x, _ = Involution(
-    channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_2"
+    channel=3,
+    group_number=1,
+    kernel_size=3,
+    stride=1,
+    reduction_ratio=2,
+    name="inv_2",
 )(x)
 x = keras.layers.ReLU()(x)
 x = keras.layers.MaxPooling2D((2, 2))(x)
 x, _ = Involution(
-    channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_3"
+    channel=3,
+    group_number=1,
+    kernel_size=3,
+    stride=1,
+    reduction_ratio=2,
+    name="inv_3",
 )(x)
 x = keras.layers.ReLU()(x)
 x = keras.layers.Flatten()(x)