Potential tf.keras fix #26

sinclairnick · 2020-04-30T00:17:04Z

Posting this for future people who may run into the issues I did.

I couldn't get the original implementation to work properly for me, using tf.keras. Hence, I made the necessary modifications to produce the correct output shapes. Currently the output shape is defined as

(None, 1, total_n_pyramid_sections, n_channels)

The code I used to achieve this is:

# from tensorflow.keras.layers import Layer
# pylint: disable=import-error
import tensorflow as tf
from tensorflow.keras.layers import Layer, Reshape, concatenate, Permute
from tensorflow.keras import backend as K

# taken from https://github.com/yhenon/keras-spp


class SpatialPyramidPooling(Layer):
    """Spatial pyramid pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_list: list of int
            List of pooling regions to use. The length of the list is the number of pooling regions,
            each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
            regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if dim_ordering='th'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if dim_ordering='tf'.
    # Output shape
        2D tensor with shape:
        `(samples, channels * sum([i * i for i in pool_list])`
    """

    def __init__(self, pool_list, **kwargs):


        self.pool_list = pool_list

        self.num_outputs_per_channel = sum([i * i for i in pool_list])

        super(SpatialPyramidPooling, self).__init__(**kwargs)

    def build(self, input_shape):
        self.nb_channels = input_shape[3]

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)

    def get_config(self):
        config = {'pool_list': self.pool_list}
        base_config = super(SpatialPyramidPooling, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def call(self, x, mask=None):

        input_shape = K.shape(x)
        num_rows = input_shape[1]
        num_cols = input_shape[2]

        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        for pool_num, num_pool_regions in enumerate(self.pool_list):
            for jy in range(num_pool_regions):
                for ix in range(num_pool_regions):
                    x1 = ix * col_length[pool_num]
                    x2 = ix * col_length[pool_num] + col_length[pool_num]
                    y1 = jy * row_length[pool_num]
                    y2 = jy * row_length[pool_num] + row_length[pool_num]

                    x1 = K.cast(K.round(x1), 'int32')
                    x2 = K.cast(K.round(x2), 'int32')
                    y1 = K.cast(K.round(y1), 'int32')
                    y2 = K.cast(K.round(y2), 'int32')

                    new_shape = [input_shape[0], y2 - y1,
                                    x2 - x1, input_shape[3]]

                    x_crop = x[:, y1:y2, x1:x2, :]
                    xm = Reshape(new_shape)(x_crop)
                    pooled_val = K.max(xm, axis=(1, 2))
                    outputs.append(pooled_val)


        outputs = concatenate(outputs, axis=2)
       # ---change this line to alter output shape---
        outputs = Reshape((input_shape[0], 1, self.num_outputs_per_channel, self.nb_channels))(outputs)
        outputs = K.squeeze(outputs, axis=1) # makes output only rank 4, instead of 5
        # outputs = concatenate(outputs)
        # outputs = Reshape((len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))(outputs)
        # outputs = Permute(outputs,(3,1,0,2))

        return outputs

The text was updated successfully, but these errors were encountered:

wangyexiang · 2020-08-06T07:24:26Z

Dear sir, How to fix the below issue?@sinclairnick
tensorflow version: 2.3

Encountered error:
    """
    using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

here is my code:

import os
import numpy as np
import tensorflow
from tensorflow.keras import optimizers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Dropout

# Minimizes Tensorflow Logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

BATCH_SIZE = 64
NUM_CHANNELS = 1
NUM_CLASSES = 10

def makeModel():
	model = Sequential()

	# MODEL 1
	# uses tensorflow ordering. Note that we leave the image size as None to allow multiple image sizes
	model.add(Conv2D(32, 3, 3, padding='same', input_shape=(None, None, NUM_CHANNELS)))
	model.add(Activation('relu'))
	model.add(Conv2D(32, 3, 3, padding='same'))
	model.add(Activation('relu'))
	model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
	model.add(Conv2D(64, 3, 3, padding='same'))
	model.add(Activation('relu'))
	model.add(Conv2D(64, 3, 3, padding='same'))
	model.add(Activation('relu'))
	model.add(SpatialPyramidPooling([1, 2, 4]))
	model.add(Dense(NUM_CLASSES))
	model.add(Activation('softmax'))

	return model

def main():
	model=makeModel()
	model.summary()

	(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
	train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
	train_images = (train_images - 127.5) / 127.5		# Normalize the images to [-1, 1]

	test_images = test_images.reshape(test_images.shape[0], 28, 28, 1).astype('float32')
	test_images = (test_images - 127.5) / 127.5			# Normalize the images to [-1, 1]

	adam=optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
	model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics = ["accuracy"])
	model.fit(train_images, train_labels, epochs=10)

if __name__ == '__main__':
	main()

ffalam · 2021-06-17T21:41:18Z

@sinclairnick the code is not working, still getting
Encountered error:
"""
using a tf.Tensor as a Python bool is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

Any comments how to solve it ? Would you please share what is your main function and model ?

zharfanzahisham · 2022-06-24T18:55:06Z

Not sure if I got it right but I think I might've fixed it. Here's the modified code

from keras.engine.topology import Layer
import keras.backend as K


class SpatialPyramidPooling(Layer):
    """Spatial pyramid pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_list: list of int
            List of pooling regions to use. The length of the list is the number of pooling regions,
            each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
            regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if dim_ordering='th'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if dim_ordering='tf'.
    # Output shape
        2D tensor with shape:
        `(samples, channels * sum([i * i for i in pool_list])`
    """

    def __init__(self, pool_list, **kwargs):
        self.dim_ordering = K.image_data_format()
        assert self.dim_ordering in {'channels_last', 'channels_first'}, 'dim_ordering must be in {tf, th}'

        self.pool_list = pool_list

        self.num_outputs_per_channel = sum([i * i for i in pool_list])

        super(SpatialPyramidPooling, self).__init__(**kwargs)

    def build(self, input_shape):
        if self.dim_ordering == 'channels_first':
            self.nb_channels = input_shape[1]
        elif self.dim_ordering == 'channels_last':
            self.nb_channels = input_shape[3]

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)

    def get_config(self):
        config = {'pool_list': self.pool_list}
        base_config = super(SpatialPyramidPooling, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def call(self, x, mask=None):

        input_shape = K.shape(x)

        if self.dim_ordering == 'channels_first':
            num_rows = input_shape[2]
            num_cols = input_shape[3]
        elif self.dim_ordering == 'channels_last':
            num_rows = input_shape[1]
            num_cols = input_shape[2]

        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        if self.dim_ordering == 'channels_first':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')
                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]
                        x_crop = x[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

        elif self.dim_ordering == 'channels_last':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], y2 - y1,
                                     x2 - x1, input_shape[3]]

                        x_crop = x[:, y1:y2, x1:x2, :]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(1, 2))
                        outputs.append(pooled_val)

        if self.dim_ordering == 'channels_first':
            outputs = K.concatenate(outputs)
        elif self.dim_ordering == 'channels_last':
            #outputs = K.concatenate(outputs,axis = 1)
            outputs = K.concatenate(outputs)
            # outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
            outputs = K.reshape(outputs, self.compute_output_shape(input_shape))
            #outputs = K.permute_dimensions(outputs,(3,1,0,2))
            #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))

        return outputs

danibcorr · 2024-02-21T14:52:05Z

I tried the following version, with the tensor channel dimensions in the last position (tam_batch, height, width, channel) and it worked BUT before applying the output layer (Dense + Softmax) I applied Layer Normalization with GeLU activation because it was giving me NaN in the loss function. The code used is:

import tensorflow as tf
from tensorflow.keras import layers

# taken from https://github.com/yhenon/keras-spp


class SpatialPyramidPooling(layers.Layer):

    """Spatial pyramid pooling layer for 2D inputs.
    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
    K. He, X. Zhang, S. Ren, J. Sun
    # Arguments
        pool_list: list of int
            List of pooling regions to use. The length of the list is the number of pooling regions,
            each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
            regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if dim_ordering='th'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if dim_ordering='tf'.
    # Output shape
        2D tensor with shape:
        `(samples, channels * sum([i * i for i in pool_list])`
    """

    def __init__(self, pool_list, **kwargs):

        super(SpatialPyramidPooling, self).__init__(**kwargs)

        self.pool_list = pool_list
        self.num_outputs_per_channel = sum([i * i for i in pool_list])

        
    def build(self, input_shape):

        self.nb_channels = input_shape[3]


    def get_config(self):

        config = {'pool_list': self.pool_list}
        base_config = super(SpatialPyramidPooling, self).get_config()

        return dict(list(base_config.items()) + list(config.items()))


    def call(self, x, mask=None):

        input_shape = tf.shape(x)
        num_rows = input_shape[1]
        num_cols = input_shape[2]

        row_length = [tf.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [tf.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        for pool_num, num_pool_regions in enumerate(self.pool_list):

            for jy in range(num_pool_regions):

                for ix in range(num_pool_regions):

                    x1 = ix * col_length[pool_num]
                    x2 = ix * col_length[pool_num] + col_length[pool_num]
                    y1 = jy * row_length[pool_num]
                    y2 = jy * row_length[pool_num] + row_length[pool_num]

                    x1 = tf.cast(tf.round(x1), dtype = tf.int32)
                    x2 = tf.cast(tf.round(x2), dtype = tf.int32)
                    y1 = tf.cast(tf.round(y1), dtype = tf.int32)
                    y2 = tf.cast(tf.round(y2), dtype = tf.int32)

                    new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]]

                    x_crop = x[:, y1:y2, x1:x2, :]
                    xm = tf.reshape(tensor = x_crop, shape = new_shape)
                    pooled_val = tf.reduce_max(xm, axis=(1, 2))
                    outputs.append(pooled_val)

        outputs = tf.concat(outputs, axis = 1)

        return outputs

sinclairnick mentioned this issue Apr 30, 2020

Not working with tf.keras #25

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Potential tf.keras fix #26

Potential tf.keras fix #26

sinclairnick commented Apr 30, 2020 •

edited

Loading

wangyexiang commented Aug 6, 2020 •

edited

Loading

ffalam commented Jun 17, 2021

zharfanzahisham commented Jun 24, 2022 •

edited

Loading

danibcorr commented Feb 21, 2024

Potential tf.keras fix #26

Potential tf.keras fix #26

Comments

sinclairnick commented Apr 30, 2020 • edited Loading

wangyexiang commented Aug 6, 2020 • edited Loading

ffalam commented Jun 17, 2021

zharfanzahisham commented Jun 24, 2022 • edited Loading

danibcorr commented Feb 21, 2024

sinclairnick commented Apr 30, 2020 •

edited

Loading

wangyexiang commented Aug 6, 2020 •

edited

Loading

zharfanzahisham commented Jun 24, 2022 •

edited

Loading