Skip to content

Commit 91b13a4

Browse files
committed
update: namespaces
1 parent 8aecc84 commit 91b13a4

11 files changed

+41
-69
lines changed

.github/workflows/format.yml

-28
This file was deleted.

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ This is a unofficial `Keras` reimplementation of [VideoMAE: Masked Autoencoders
2626
# Install
2727

2828
```bash
29-
pip install -U git+https://github.com/innat/VideoMAE.git
29+
git clone https://github.com/innat/VideoMAE.git
30+
cd VideoMAE
31+
pip install -e .
3032
```
3133

3234
# Usage

requirements.txt

-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
tensorflow>=2.12
22
opencv-python>=4.1.2
3-
isort
4-
flake8

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
long_description_content_type="text/markdown",
1111
author="Mohammed Innat",
1212
author_email="[email protected]",
13-
url="https://github.com/innat/DOLG-TensorFlow",
13+
url="https://github.com/innat/VideoMAE",
1414
keywords=["deep learning", "image retrieval", "image recognition"],
1515
install_requires=[
1616
"opencv-python>=4.1.2",
@@ -25,6 +25,6 @@
2525
"Intended Audience :: Developers",
2626
"Topic :: Scientific/Engineering :: Artificial Intelligence",
2727
"License :: OSI Approved :: MIT License",
28-
"Programming Language :: Python :: 3.7",
28+
"Programming Language :: Python :: 3.8",
2929
],
3030
)

videomae/blocks/basic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from tensorflow.keras import layers
44
from tensorflow.keras.initializers import Constant
55

6-
from videomae.layers import TFAttention, TFDropPath, TFMlp
6+
from layers import TFAttention, TFDropPath, TFMlp
77

88

99
class TFBlock(keras.Model):

videomae/blocks/vit_decoder.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from tensorflow import keras
55
from tensorflow.keras import layers
66

7-
from videomae.blocks import TFBlock
8-
from videomae.layers import TFAttention
7+
from blocks import TFBlock
8+
from layers import TFAttention
99

1010

1111
class TFPretrainVisionTransformerDecoder(keras.Model):

videomae/blocks/vit_encoder.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from tensorflow.keras import layers
66
from tensorflow.keras.initializers import TruncatedNormal
77

8-
from videomae.blocks import TFBlock
9-
from videomae.layers import TFAttention, TFPatchEmbed
10-
from videomae.utils import get_sinusoid_encoding_table_tf
8+
from blocks import TFBlock
9+
from layers import TFAttention, TFPatchEmbed
10+
from utils import get_sinusoid_encoding_table_tf
1111

1212

1313
class TFPretrainVisionTransformerEncoder(keras.Model):

videomae/layers/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from .attention import TFAttention
22
from .drop_path import TFDropPath
3-
from .masking_generator import TubeMaskingGenerator
43
from .mlp import TFMlp
54
from .patch_embed import TFPatchEmbed

videomae/utils/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
from .sinusoid_encoding_table import get_sinusoid_encoding_table_tf
2+
from .masking_generator import TubeMaskingGenerator
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
1-
import numpy as np
2-
3-
4-
class TubeMaskingGenerator:
5-
def __init__(self, input_size, mask_ratio):
6-
self.frames, self.height, self.width = input_size
7-
self.num_patches_per_frame = self.height * self.width
8-
self.total_patches = self.frames * self.num_patches_per_frame
9-
self.num_masks_per_frame = int(mask_ratio * self.num_patches_per_frame)
10-
self.total_masks = self.frames * self.num_masks_per_frame
11-
12-
def __repr__(self):
13-
repr_str = "Maks: total patches {}, mask patches {}".format(
14-
self.total_patches, self.total_masks
15-
)
16-
return repr_str
17-
18-
def __call__(self):
19-
mask_per_frame = np.hstack(
20-
[
21-
np.zeros(self.num_patches_per_frame - self.num_masks_per_frame),
22-
np.ones(self.num_masks_per_frame),
23-
]
24-
)
25-
np.random.shuffle(mask_per_frame)
26-
mask = np.tile(mask_per_frame, (self.frames, 1)).flatten()
27-
return mask
1+
import numpy as np
2+
3+
4+
class TubeMaskingGenerator:
5+
def __init__(self, input_size, mask_ratio):
6+
self.frames, self.height, self.width = input_size
7+
self.num_patches_per_frame = self.height * self.width
8+
self.total_patches = self.frames * self.num_patches_per_frame
9+
self.num_masks_per_frame = int(mask_ratio * self.num_patches_per_frame)
10+
self.total_masks = self.frames * self.num_masks_per_frame
11+
12+
def __repr__(self):
13+
repr_str = "Maks: total patches {}, mask patches {}".format(
14+
self.total_patches, self.total_masks
15+
)
16+
return repr_str
17+
18+
def __call__(self):
19+
mask_per_frame = np.hstack(
20+
[
21+
np.zeros(self.num_patches_per_frame - self.num_masks_per_frame),
22+
np.ones(self.num_masks_per_frame),
23+
]
24+
)
25+
np.random.shuffle(mask_per_frame)
26+
mask = np.tile(mask_per_frame, (self.frames, 1)).flatten()
27+
return mask

videomae/videomae_pretrain.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from tensorflow.keras import layers
1111
from tensorflow.keras.initializers import RandomNormal
1212

13-
from videomae.blocks import (TFPretrainVisionTransformerDecoder,
14-
TFPretrainVisionTransformerEncoder)
13+
from videomae.blocks import TFPretrainVisionTransformerDecoder
14+
from videomae.blocks import TFPretrainVisionTransformerEncoder
1515
from videomae.utils import get_sinusoid_encoding_table_tf
1616

1717
from .model_configs import MODEL_CONFIGS

0 commit comments

Comments
 (0)