Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Refactor code for speed and clarity

Co-authored-by: UltralyticsAssistant <[email protected]>
  • Loading branch information
glenn-jocher and UltralyticsAssistant authored Aug 26, 2024
1 parent 06525f9 commit 2423cdd
Show file tree
Hide file tree
Showing 20 changed files with 175 additions and 83 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/merge-main-into-prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ jobs:
g = Github(os.getenv('GITHUB_TOKEN'))
repo = g.get_repo(os.getenv('GITHUB_REPOSITORY'))
# Fetch the default branch name
default_branch_name = repo.default_branch
default_branch = repo.get_branch(default_branch_name)
for pr in repo.get_pulls(state='open', sort='created'):
try:
# Get full names for repositories and branches
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<a href="https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv3 CI"></a>
<a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv3 Citation"></a>
<a href="https://hub.docker.com/r/ultralytics/yolov3"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov3?logo=docker" alt="Docker Pulls"></a>
<a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
<a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
<br>
<a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
Expand Down
2 changes: 2 additions & 0 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@


class iOSModel(torch.nn.Module):
"""Exports a PyTorch model to an iOS-compatible format with normalized input dimensions and class configurations."""

def __init__(self, model, im):
"""
Initializes an iOSModel with normalized input dimensions and number of classes from a PyTorch model.
Expand Down
78 changes: 52 additions & 26 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def autopad(k, p=None, d=1): # kernel, padding, dilation


class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
"""A standard Conv2D layer with batch normalization and optional activation for neural networks."""

default_act = nn.SiLU() # default activation

def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
Expand All @@ -81,7 +82,8 @@ def forward_fuse(self, x):


class DWConv(Conv):
# Depth-wise convolution
"""Implements depth-wise convolution for efficient spatial feature extraction in neural networks."""

def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
"""Initializes depth-wise convolution with optional activation; parameters are channel in/out, kernel, stride,
dilation.
Expand All @@ -90,7 +92,8 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, s


class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution
"""Implements a depth-wise transpose convolution layer with specified channels, kernel size, stride, and padding."""

def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
"""Initializes a depth-wise or transpose convolution layer with specified in/out channels, kernel size, stride,
and padding.
Expand All @@ -99,7 +102,8 @@ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stri


class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
"""Transformer layer with multi-head attention and feed-forward network, optimized by removing LayerNorm."""

def __init__(self, c, num_heads):
"""Initializes a Transformer layer as per https://arxiv.org/abs/2010.11929, sans LayerNorm, with specified
embedding dimension and number of heads.
Expand All @@ -122,7 +126,8 @@ def forward(self, x):


class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
"""Implements a Vision Transformer block with transformer layers; https://arxiv.org/abs/2010.11929."""

def __init__(self, c1, c2, num_heads, num_layers):
"""Initializes a Transformer block with optional convolution, linear, and transformer layers."""
super().__init__()
Expand All @@ -143,7 +148,8 @@ def forward(self, x):


class Bottleneck(nn.Module):
# Standard bottleneck
"""Implements a bottleneck layer with optional shortcut for efficient feature extraction in neural networks."""

def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
"""Initializes a standard bottleneck layer with optional shortcut; args: input channels (c1), output channels
(c2), shortcut (bool), groups (g), expansion factor (e).
Expand All @@ -162,7 +168,8 @@ def forward(self, x):


class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
"""Implements a CSP Bottleneck layer for feature extraction."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
"""Initializes CSP Bottleneck with channel in/out, optional shortcut, groups, expansion; see
https://github.com/WongKinYiu/CrossStagePartialNetworks.
Expand All @@ -187,7 +194,8 @@ def forward(self, x):


class CrossConv(nn.Module):
# Cross Convolution Downsample
"""Implements Cross Convolution Downsample with 1D and 2D convolutions and optional shortcut."""

def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
"""Initializes CrossConv with downsample options, combining 1D and 2D convolutions, optional shortcut if
input/output channels match.
Expand All @@ -204,7 +212,8 @@ def forward(self, x):


class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
"""Implements a CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion factor."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
"""Initializes CSP Bottleneck with 3 convolutions, optional shortcuts, group convolutions, and expansion
factor.
Expand All @@ -222,7 +231,8 @@ def forward(self, x):


class C3x(C3):
# C3 module with cross-convolutions
"""Extends the C3 module with cross-convolutions for enhanced feature extraction and flexibility."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes a C3x module with cross-convolutions, extending the C3 module with customizable parameters."""
super().__init__(c1, c2, n, shortcut, g, e)
Expand All @@ -231,7 +241,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class C3TR(C3):
# C3 module with TransformerBlock()
"""C3 module with TransformerBlock for integrating attention mechanisms in CNNs."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes a C3 module with TransformerBlock, extending C3 for attention mechanisms."""
super().__init__(c1, c2, n, shortcut, g, e)
Expand All @@ -240,7 +251,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class C3SPP(C3):
# C3 module with SPP()
"""Extends C3 with Spatial Pyramid Pooling (SPP) for enhanced feature extraction in CNNs."""

def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3SPP module, extending C3 with Spatial Pyramid Pooling for enhanced feature extraction."""
super().__init__(c1, c2, n, shortcut, g, e)
Expand All @@ -249,7 +261,8 @@ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):


class C3Ghost(C3):
# C3 module with GhostBottleneck()
"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in neural networks."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3Ghost module with Ghost Bottlenecks for efficient feature extraction."""
super().__init__(c1, c2, n, shortcut, g, e)
Expand All @@ -258,7 +271,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
"""Implements Spatial Pyramid Pooling (SPP) for enhanced feature extraction; see https://arxiv.org/abs/1406.4729."""

def __init__(self, c1, c2, k=(5, 9, 13)):
"""
Initializes SPP layer with specified channels and kernels.
Expand All @@ -284,7 +298,8 @@ def forward(self, x):


class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv3 by Glenn Jocher
"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv3 models."""

def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
"""Initializes the SPPF layer with specified input/output channels and kernel size for YOLOv3."""
super().__init__()
Expand All @@ -306,7 +321,8 @@ def forward(self, x):


class Focus(nn.Module):
# Focus wh information into c-space
"""Focuses spatial information into channel space using configurable convolution."""

def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
"""Initializes Focus module to focus width and height information into channel space with configurable
convolution parameters.
Expand All @@ -322,7 +338,8 @@ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)


class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
"""Implements Ghost Convolution for efficient feature extraction; see github.com/huawei-noah/ghostnet."""

def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
"""Initializes GhostConv with in/out channels, kernel size, stride, groups; see
https://github.com/huawei-noah/ghostnet.
Expand All @@ -339,7 +356,8 @@ def forward(self, x):


class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
"""Implements a Ghost Bottleneck layer for efficient feature extraction from GhostNet."""

def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
"""Initializes GhostBottleneck module with in/out channels, kernel size, and stride; see
https://github.com/huawei-noah/ghostnet.
Expand All @@ -361,7 +379,8 @@ def forward(self, x):


class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
"""Contracts spatial dimensions into channels, e.g., (1,64,80,80) to (1,256,40,40) with a specified gain."""

def __init__(self, gain=2):
"""Initializes Contract module to refine input dimensions, e.g., from (1,64,80,80) to (1,256,40,40) with a
default gain of 2.
Expand All @@ -381,7 +400,8 @@ def forward(self, x):


class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
"""Expands spatial dimensions of input tensor by a factor while reducing channels correspondingly."""

def __init__(self, gain=2):
"""Initializes Expand module to increase spatial dimensions by factor `gain` while reducing channels
correspondingly.
Expand All @@ -401,7 +421,8 @@ def forward(self, x):


class Concat(nn.Module):
# Concatenate a list of tensors along dimension
"""Concatenates a list of tensors along a specified dimension for efficient feature aggregation."""

def __init__(self, dimension=1):
"""Initializes a module to concatenate tensors along a specified dimension."""
super().__init__()
Expand All @@ -415,7 +436,8 @@ def forward(self, x):


class DetectMultiBackend(nn.Module):
# YOLOv3 MultiBackend class for python inference on various backends
"""YOLOv3 multi-backend class for inference on frameworks like PyTorch, ONNX, TensorRT, and more."""

def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
"""Initializes multi-backend detection with options for various frameworks and devices, also handles model
download.
Expand Down Expand Up @@ -749,7 +771,8 @@ def _load_metadata(f=Path("path/to/meta.yaml")):


class AutoShape(nn.Module):
# YOLOv3 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
"""A wrapper for YOLOv3 models to handle diverse input types with preprocessing, inference, and NMS."""

conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
Expand Down Expand Up @@ -857,7 +880,8 @@ def forward(self, ims, size=640, augment=False, profile=False):


class Detections:
# YOLOv3 detections class for inference results
"""Handles YOLOv3 detection results with methods for visualization, saving, cropping, and format conversion."""

def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
"""Initializes YOLOv3 detections with image data, predictions, filenames, profiling times, class names, and
shapes.
Expand Down Expand Up @@ -1011,7 +1035,8 @@ def __repr__(self):


class Proto(nn.Module):
# YOLOv3 mask Proto module for segmentation models
"""Implements the YOLOv3 mask Proto module for segmentation, including convolutional layers and upsampling."""

def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
"""Initializes the Proto module for YOLOv3 segmentation, setting up convolutional layers and upsampling."""
super().__init__()
Expand All @@ -1026,7 +1051,8 @@ def forward(self, x):


class Classify(nn.Module):
# YOLOv3 classification head, i.e. x(b,c1,20,20) to x(b,c2)
"""Performs image classification using YOLOv3-based architecture with convolutional, pooling, and dropout layers."""

def __init__(
self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
Expand Down
9 changes: 6 additions & 3 deletions models/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@


class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
"""Computes the weighted or unweighted sum of multiple input layers per https://arxiv.org/abs/1911.09070."""

def __init__(self, n, weight=False): # n: number of inputs
"""
Initializes a module to compute weighted/unweighted sum of n inputs, with optional learning weights.
Expand Down Expand Up @@ -42,7 +43,8 @@ def forward(self, x):


class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
"""Implements mixed depth-wise convolutions for efficient neural networks; see https://arxiv.org/abs/1907.09595."""

def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
"""Initializes MixConv2d with mixed depth-wise convolution layers; details at
https://arxiv.org/abs/1907.09595.
Expand Down Expand Up @@ -72,7 +74,8 @@ def forward(self, x):


class Ensemble(nn.ModuleList):
# Ensemble of models
"""Combines outputs from multiple models to improve inference results."""

def __init__(self):
"""Initializes an ensemble of models to combine their outputs."""
super().__init__()
Expand Down
Loading

0 comments on commit 2423cdd

Please sign in to comment.