Skip to content

Commit

Permalink
qMerge branch 'main' into chen-run-torch-benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
chenmoneygithub committed Jun 30, 2023
2 parents e597ad4 + 7ee847d commit 15f2742
Show file tree
Hide file tree
Showing 24 changed files with 840 additions and 87 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Image classification benchmark.
This scripts runs image classification benchmark with dogs vs cats datasets. It
This script runs image classification benchmark with "dogs vs cats" datasets. It
supports the following 3 models:
- EfficientNetV2B0
- Xception
Expand Down
1 change: 0 additions & 1 deletion examples/demo_custom_layer_backend_agnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import keras_core
from keras_core import Model
from keras_core import backend
from keras_core import initializers
from keras_core import layers
from keras_core import losses
Expand Down
3 changes: 2 additions & 1 deletion examples/demo_custom_torch_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def train(model, train_loader, num_epochs, optimizer, loss_fn):
######## Using a Keras model or layer in a torch Module ########
################################################################


class MyModel(nn.Module):
def __init__(self):
super().__init__()
Expand Down Expand Up @@ -126,4 +127,4 @@ def forward(self, x):
# Instantiate the torch loss function
loss_fn = nn.CrossEntropyLoss()

train(torch_module, train_loader, num_epochs, optimizer, loss_fn)
train(torch_module, train_loader, num_epochs, optimizer, loss_fn)
7 changes: 6 additions & 1 deletion examples/demo_jax_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,12 @@ def make_model():
# data will be split along the batch axis
data_mesh = Mesh(devices, axis_names=("batch",)) # naming axes of the mesh
# naming axes of the sharded partition
data_sharding = NamedSharding(data_mesh,P("batch",),)
data_sharding = NamedSharding(
data_mesh,
P(
"batch",
),
)

# all variables will be replicated on all devices
var_mesh = Mesh(devices, axis_names=("_"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import keras_core as keras
from keras_core import layers
from keras_core import ops

"""
## First, load the data and apply preprocessing
"""
Expand Down Expand Up @@ -97,7 +98,11 @@
df = df.sample(frac=1, random_state=42)
x = df[["user", "movie"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
y = (
df["rating"]
.apply(lambda x: (x - min_rating) / (max_rating - min_rating))
.values
)
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * df.shape[0])
x_train, x_val, y_train, y_val = (
Expand Down Expand Up @@ -204,7 +209,8 @@ def call(self, inputs):
ratings = model.predict(user_movie_array).flatten()
top_ratings_indices = ratings.argsort()[-10:][::-1]
recommended_movie_ids = [
movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
movie_encoded2movie.get(movies_not_watched[x][0])
for x in top_ratings_indices
]

print("Showing recommendations for user: {}".format(user_id))
Expand Down
63 changes: 47 additions & 16 deletions examples/keras_io/tensorflow/vision/image_captioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ def train_val_split(caption_data, train_size=0.8, shuffle=True):

def custom_standardization(input_string):
lowercase = tf.strings.lower(input_string)
return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")
return tf.strings.regex_replace(
lowercase, "[%s]" % re.escape(strip_chars), ""
)


strip_chars = "!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
Expand Down Expand Up @@ -263,7 +265,9 @@ def get_cnn_model():
# We freeze our feature extractor
base_model.trainable = False
base_model_out = base_model.output
base_model_out = layers.Reshape((-1, base_model_out.shape[-1]))(base_model_out)
base_model_out = layers.Reshape((-1, base_model_out.shape[-1]))(
base_model_out
)
cnn_model = keras.models.Model(base_model.input, base_model_out)
return cnn_model

Expand Down Expand Up @@ -342,7 +346,9 @@ def __init__(self, embed_dim, ff_dim, num_heads, **kwargs):
self.layernorm_3 = layers.LayerNormalization()

self.embedding = PositionalEmbedding(
embed_dim=EMBED_DIM, sequence_length=SEQ_LENGTH, vocab_size=VOCAB_SIZE
embed_dim=EMBED_DIM,
sequence_length=SEQ_LENGTH,
vocab_size=VOCAB_SIZE,
)
self.out = layers.Dense(VOCAB_SIZE, activation="softmax")

Expand Down Expand Up @@ -394,7 +400,10 @@ def get_causal_attention_mask(self, inputs):
mask = tf.cast(i >= j, dtype="int32")
mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
mult = tf.concat(
[tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)],
[
tf.expand_dims(batch_size, -1),
tf.constant([1, 1], dtype=tf.int32),
],
axis=0,
)
return tf.tile(mask, mult)
Expand Down Expand Up @@ -431,7 +440,9 @@ def calculate_accuracy(self, y_true, y_pred, mask):
mask = tf.cast(mask, dtype=tf.float32)
return tf.reduce_sum(accuracy) / tf.reduce_sum(mask)

def _compute_caption_loss_and_acc(self, img_embed, batch_seq, training=True):
def _compute_caption_loss_and_acc(
self, img_embed, batch_seq, training=True
):
encoder_out = self.encoder(img_embed, training=training)
batch_seq_inp = batch_seq[:, :-1]
batch_seq_true = batch_seq[:, 1:]
Expand Down Expand Up @@ -469,7 +480,8 @@ def train_step(self, batch_data):

# 4. Get the list of all the trainable weights
train_vars = (
self.encoder.trainable_variables + self.decoder.trainable_variables
self.encoder.trainable_variables
+ self.decoder.trainable_variables
)

# 5. Get the gradients
Expand All @@ -484,7 +496,10 @@ def train_step(self, batch_data):
self.acc_tracker.update_state(batch_acc)

# 8. Return the loss and accuracy values
return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
return {
"loss": self.loss_tracker.result(),
"acc": self.acc_tracker.result(),
}

def test_step(self, batch_data):
batch_img, batch_seq = batch_data
Expand Down Expand Up @@ -513,7 +528,10 @@ def test_step(self, batch_data):
self.acc_tracker.update_state(batch_acc)

# 5. Return the loss and accuracy values
return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
return {
"loss": self.loss_tracker.result(),
"acc": self.acc_tracker.result(),
}

@property
def metrics(self):
Expand All @@ -523,8 +541,12 @@ def metrics(self):


cnn_model = get_cnn_model()
encoder = TransformerEncoderBlock(embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1)
decoder = TransformerDecoderBlock(embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2)
encoder = TransformerEncoderBlock(
embed_dim=EMBED_DIM, dense_dim=FF_DIM, num_heads=1
)
decoder = TransformerDecoderBlock(
embed_dim=EMBED_DIM, ff_dim=FF_DIM, num_heads=2
)
caption_model = ImageCaptioningModel(
cnn_model=cnn_model,
encoder=encoder,
Expand All @@ -539,15 +561,20 @@ def metrics(self):

# Define the loss function
cross_entropy = keras.losses.SparseCategoricalCrossentropy(
from_logits=False, reduction=None,
from_logits=False,
reduction=None,
)

# EarlyStopping criteria
early_stopping = keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
early_stopping = keras.callbacks.EarlyStopping(
patience=3, restore_best_weights=True
)


# Learning Rate Scheduler for the optimizer
class LRSchedule(keras.optimizers.schedules.learning_rate_schedule.LearningRateSchedule):
class LRSchedule(
keras.optimizers.schedules.learning_rate_schedule.LearningRateSchedule
):
def __init__(self, post_warmup_learning_rate, warmup_steps):
super().__init__()
self.post_warmup_learning_rate = post_warmup_learning_rate
Expand All @@ -568,10 +595,14 @@ def __call__(self, step):
# Create a learning rate schedule
num_train_steps = len(train_dataset) * EPOCHS
num_warmup_steps = num_train_steps // 15
lr_schedule = LRSchedule(post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps)
lr_schedule = LRSchedule(
post_warmup_learning_rate=1e-4, warmup_steps=num_warmup_steps
)

# Compile the model
caption_model.compile(optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy)
caption_model.compile(
optimizer=keras.optimizers.Adam(lr_schedule), loss=cross_entropy
)

# Fit the model
caption_model.fit(
Expand Down Expand Up @@ -639,4 +670,4 @@ def generate_caption():
this example easily runnable, we have trained it with a few constraints, like a minimal
number of attention heads. To improve the predictions, you can try changing these training
settings and find a good model for your use case.
"""
"""
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@
projection_dim,
] # Size of the transformer layers
transformer_layers = 8
mlp_head_units = [2048, 1024] # Size of the dense layers of the final classifier
mlp_head_units = [
2048,
1024,
] # Size of the dense layers of the final classifier


"""
Expand Down Expand Up @@ -218,7 +221,9 @@ def create_vit_classifier():
representation = layers.Flatten()(representation)
representation = layers.Dropout(0.5)(representation)
# Add MLP.
features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
features = mlp(
representation, hidden_units=mlp_head_units, dropout_rate=0.5
)
# Classify outputs.
logits = layers.Dense(num_classes)(features)
# Create the Keras model.
Expand All @@ -241,7 +246,9 @@ def run_experiment(model):
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
keras.metrics.SparseTopKCategoricalAccuracy(
5, name="top-5-accuracy"
),
],
)

Expand Down Expand Up @@ -288,4 +295,4 @@ def run_experiment(model):
but also by parameters such as the learning rate schedule, optimizer, weight decay, etc.
In practice, it's recommended to fine-tune a ViT model
that was pre-trained using a large, high-resolution dataset.
"""
"""
58 changes: 48 additions & 10 deletions examples/keras_io/tensorflow/vision/involution.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ def build(self, input_shape):
keras.layers.BatchNormalization(),
keras.layers.ReLU(),
keras.layers.Conv2D(
filters=self.kernel_size * self.kernel_size * self.group_number,
filters=self.kernel_size
* self.kernel_size
* self.group_number,
kernel_size=1,
),
]
Expand Down Expand Up @@ -198,22 +200,39 @@ def call(self, x):

# Compute involution with stride 1.
output_tensor, _ = Involution(
channel=3, group_number=1, kernel_size=5, stride=1, reduction_ratio=1, name="inv_1"
channel=3,
group_number=1,
kernel_size=5,
stride=1,
reduction_ratio=1,
name="inv_1",
)(input_tensor)
print(f"with stride 1 ouput shape: {output_tensor.shape}")

# Compute involution with stride 2.
output_tensor, _ = Involution(
channel=3, group_number=1, kernel_size=5, stride=2, reduction_ratio=1, name="inv_2"
channel=3,
group_number=1,
kernel_size=5,
stride=2,
reduction_ratio=1,
name="inv_2",
)(input_tensor)
print(f"with stride 2 ouput shape: {output_tensor.shape}")

# Compute involution with stride 1, channel 16 and reduction ratio 2.
output_tensor, _ = Involution(
channel=16, group_number=1, kernel_size=5, stride=1, reduction_ratio=2, name="inv_3"
channel=16,
group_number=1,
kernel_size=5,
stride=1,
reduction_ratio=2,
name="inv_3",
)(input_tensor)
print(
"with channel 16 and reduction ratio 2 ouput shape: {}".format(output_tensor.shape)
"with channel 16 and reduction ratio 2 ouput shape: {}".format(
output_tensor.shape
)
)

"""
Expand Down Expand Up @@ -250,7 +269,9 @@ def call(self, x):
.shuffle(256)
.batch(256)
)
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(256)
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(
256
)

"""
## Visualise the data
Expand Down Expand Up @@ -287,7 +308,9 @@ def call(self, x):
print("building the convolution model...")
conv_model = keras.Sequential(
[
keras.layers.Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding="same"),
keras.layers.Conv2D(
32, (3, 3), input_shape=(32, 32, 3), padding="same"
),
keras.layers.ReLU(name="relu1"),
keras.layers.MaxPooling2D((2, 2)),
keras.layers.Conv2D(64, (3, 3), padding="same"),
Expand Down Expand Up @@ -323,17 +346,32 @@ def call(self, x):

inputs = keras.Input(shape=(32, 32, 3))
x, _ = Involution(
channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_1"
channel=3,
group_number=1,
kernel_size=3,
stride=1,
reduction_ratio=2,
name="inv_1",
)(inputs)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPooling2D((2, 2))(x)
x, _ = Involution(
channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_2"
channel=3,
group_number=1,
kernel_size=3,
stride=1,
reduction_ratio=2,
name="inv_2",
)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPooling2D((2, 2))(x)
x, _ = Involution(
channel=3, group_number=1, kernel_size=3, stride=1, reduction_ratio=2, name="inv_3"
channel=3,
group_number=1,
kernel_size=3,
stride=1,
reduction_ratio=2,
name="inv_3",
)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Flatten()(x)
Expand Down
Loading

0 comments on commit 15f2742

Please sign in to comment.