diff --git a/MANIFEST.in b/MANIFEST.in
index cdfecc0..14c65d7 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -8,6 +8,9 @@ include pyvision/detection/detr/utils/pallete
 include pyvision/detection/detr/config/*.json
 include pyvision/detection/detr/data/*.txt
 
+include pyvision/detection/efficientdet/config/*.json
+include pyvision/detection/efficientdet/config/*.yaml
+
 include pyvision/segmentation/pspnet/config/*.json
 include pyvision/segmentation/pspnet/data/*.txt 
 
diff --git a/pyvision/detection/efficientdet/__init__.py b/pyvision/detection/efficientdet/__init__.py
new file mode 100644
index 0000000..59e8c59
--- /dev/null
+++ b/pyvision/detection/efficientdet/__init__.py
@@ -0,0 +1 @@
+from .model import EffdetInferAPI as EfficientDet
\ No newline at end of file
diff --git a/pyvision/detection/efficientdet/config/__init__.py b/pyvision/detection/efficientdet/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyvision/detection/efficientdet/config/dataset_coco.yaml b/pyvision/detection/efficientdet/config/dataset_coco.yaml
new file mode 100644
index 0000000..c8b61e2
--- /dev/null
+++ b/pyvision/detection/efficientdet/config/dataset_coco.yaml
@@ -0,0 +1,28 @@
+class_list : ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+                "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+                "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+                "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+                "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+                "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
+                "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
+                "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
+                "teddy bear", "hair drier", "toothbrush"]
+
+model_name: "effdet_coco"
+
+colors : [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122),
+          (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17),
+          (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60),
+          (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34),
+          (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181),
+          (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108),
+          (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26),
+          (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50),
+          (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33),
+          (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91),
+          (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130),
+          (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3),
+          (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108),
+          (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95),
+          (2, 20, 184), (122, 37, 185)]
\ No newline at end of file
diff --git a/pyvision/detection/efficientdet/config/weights_download.json b/pyvision/detection/efficientdet/config/weights_download.json
new file mode 100644
index 0000000..4a208c7
--- /dev/null
+++ b/pyvision/detection/efficientdet/config/weights_download.json
@@ -0,0 +1,4 @@
+
+{
+    "effdet_coco": "1jvcGIWyZ3jjTltiErp-OPNTA7SLWlslR"
+}
diff --git a/pyvision/detection/efficientdet/lib/__init__.py b/pyvision/detection/efficientdet/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyvision/detection/efficientdet/lib/dataset.py b/pyvision/detection/efficientdet/lib/dataset.py
new file mode 100644
index 0000000..c891e90
--- /dev/null
+++ b/pyvision/detection/efficientdet/lib/dataset.py
@@ -0,0 +1,200 @@
+import os 
+import torch 
+import numpy as np  
+
+from torch.utils.data import Dataset, DataLoader
+from pycocotools.coco import COCO
+
+import cv2 
+
+class CustomDataset(Dataset):
+
+    def __init__(self, root_dir, img_dir="images", set_name="train2017", transform=None):
+
+        self.root_dir = root_dir
+        self.img_dir = img_dir
+        self.set_name = set_name
+        self.transform  =  transform
+
+        self.coco_tool = COCO(os.path.join(self.root_dir, 'annotations', 'instances_'+self.set_name+'.json'))
+        self.image_ids = self.coco_tool.getImgIds() 
+
+        self.load_classes() 
+    
+    def load_classes(self):
+
+        categories = self.coco_tool.loadCats(self.coco_tool.getCatIds())
+        categories.sort(key = lambda x: x["id"])
+        
+        # load name -> label
+        self.classes = {}
+        self.coco_labels = {}
+        self.coco_labels_inverse = {} 
+        for category in categories:
+            self.coco_labels[len(self.classes)] = category['id']
+            self.coco_labels_inverse[category['id']] = len(self.classes)
+            self.classes[category['name']] = len(self.classes)
+        
+        # load label -> name
+        self.labels = {} 
+        for key, value in self.classes.items():
+            self.labels[value] = key 
+        
+
+    def load_image(self, idx):
+
+        img_info = self.coco_tool.loadImgs(self.image_ids[idx])[0]
+        img_path = os.path.join(
+            self.root_dir, self.img_dir, self.set_name, img_info['file_name']
+        )
+        img = cv2.imread(img_path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        img = img.astype(np.float32) / 255.0 
+
+        return img 
+
+    def coco_label_to_label(self, coco_label):
+        return self.coco_labels_inverse[coco_label]
+
+    def label_to_coco_label(self, label):
+        return self.coco_labels[label]
+    
+    def num_classes(self):
+        return len(self.classes)
+
+    def load_annotations(self, idx):
+
+        anno_ids = self.coco_tool.getAnnIds(
+            imgIds=self.image_ids[idx], iscrowd=False
+        )
+        annotations = np.zeros((0, 5))
+
+        # if some images miss annotations 
+        if len(anno_ids) == 0:
+            return annotations
+        
+        # parsing the annotations here 
+        coco_annotations = self.coco_tool.loadAnns(anno_ids)
+        for idx, a in enumerate(coco_annotations):
+
+            # skip the annotations that have no height/width
+            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
+                continue 
+            
+            annotation = np.zeros((1, 5))
+            annotation[0, :4] = a['bbox']
+            annotation[0, 4] = self.coco_label_to_label(a['category_id'])
+            annotations = np.append(annotations, annotation, axis=0)
+        
+        # transform [x, y, w, h] -> [x1, y1, x2, y2]
+        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
+        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
+
+        return annotations    
+    
+
+    def __len__(self):
+        return len(self.image_ids)
+
+    
+    def __getitem__(self, idx):
+
+        img = self.load_image(idx)
+        annot = self.load_annotations(idx)
+        
+        data = {
+            "img": img, 
+            "annot": annot
+        }
+        
+        if self.transform:
+            data = self.transform(data)
+
+        return data
+
+
+def collater(data):
+    imgs = [s['img'] for s in data]
+    annots = [s['annot'] for s in data]
+    scales = [s['scale'] for s in data]
+
+    imgs = torch.from_numpy(np.stack(imgs, axis=0))
+
+    max_num_annots = max(annot.shape[0] for annot in annots)
+
+    if max_num_annots > 0:
+
+        annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
+
+        if max_num_annots > 0:
+            for idx, annot in enumerate(annots):
+                if annot.shape[0] > 0:
+                    annot_padded[idx, :annot.shape[0], :] = annot
+    else:
+        annot_padded = torch.ones((len(annots), 1, 5)) * -1
+
+    imgs = imgs.permute(0, 3, 1, 2)
+
+    return {'img': imgs, 'annot': annot_padded, 'scale': scales}
+
+
+class Resizer(object):
+    """Convert ndarrays in sample to Tensors."""
+
+    def __call__(self, sample, common_size=512):
+        image, annots = sample['img'], sample['annot']
+        height, width, _ = image.shape
+        if height > width:
+            scale = common_size / height
+            resized_height = common_size
+            resized_width = int(width * scale)
+        else:
+            scale = common_size / width
+            resized_height = int(height * scale)
+            resized_width = common_size
+
+        image = cv2.resize(image, (resized_width, resized_height))
+
+        new_image = np.zeros((common_size, common_size, 3))
+        new_image[0:resized_height, 0:resized_width] = image
+
+        annots[:, :4] *= scale
+
+        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
+
+
+class Augmenter(object):
+    """Convert ndarrays in sample to Tensors."""
+
+    def __call__(self, sample, flip_x=0.5):
+        if np.random.rand() < flip_x:
+            image, annots = sample['img'], sample['annot']
+            image = image[:, ::-1, :]
+
+            rows, cols, channels = image.shape
+
+            x1 = annots[:, 0].copy()
+            x2 = annots[:, 2].copy()
+
+            x_tmp = x1.copy()
+
+            annots[:, 0] = cols - x2
+            annots[:, 2] = cols - x_tmp
+
+            sample = {'img': image, 'annot': annots}
+
+        return sample
+
+
+class Normalizer(object):
+
+    def __init__(self):
+        self.mean = np.array([[[0.485, 0.456, 0.406]]])
+        self.std = np.array([[[0.229, 0.224, 0.225]]])
+
+    def __call__(self, sample):
+        image, annots = sample['img'], sample['annot']
+
+        return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
+
diff --git a/pyvision/detection/efficientdet/lib/losses.py b/pyvision/detection/efficientdet/lib/losses.py
new file mode 100644
index 0000000..9781b44
--- /dev/null
+++ b/pyvision/detection/efficientdet/lib/losses.py
@@ -0,0 +1,292 @@
+import torch
+import torch.nn as nn 
+
+
+def iou(a, b):
+
+    area_a = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1)
+    area_b = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
+
+    iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], dim=1), b[:, 0])
+    ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], dim=1), b[:, 1])
+    iw = torch.clamp(iw, min=0)
+    ih = torch.clamp(ih, min=0)
+
+    inter_area = iw * ih
+    union_area = area_a + area_b - inter_area
+
+    union_area = torch.clamp(union_area, min=1e-8)
+    iou_score = inter_area / union_area
+
+    return iou_score
+
+"""
+class FocalLoss(nn.Module):
+
+    def __init__(self, alpha=0.25, gamma=2, device="cuda"):
+
+        super(FocalLoss, self).__init__()
+
+        self.alpha = alpha 
+        self.gamma = gamma 
+        self.device = device
+    
+    def forward(self, classifications, regressions, anchors, annotations):
+
+        batch_size = classifications.shape[0]
+        
+        classification_loss = []
+        regression_loss = []
+
+        anchor = anchors[0, :, :]
+
+        anchor_widths = anchor[:, 2] - anchor[:, 0]
+        anchor_heights = anchor[:, 3] - anchor[:, 1]
+        anchor_x = anchor[:, 0] + 0.5 * anchor_widths
+        anchor_y = anchor[:, 1] + 0.5 * anchor_heights
+
+        for i in range(batch_size):
+
+            classification = classifications[i, :, :]
+            regression = regressions[i, :, :]
+
+            box_annotation = annotations[i, :, :]
+            box_annotation = box_annotation[box_annotation[:, 4] != -1]
+
+            if box_annotation.shape[0] == 0:
+                if self.device == "cuda" and torch.cuda.is_available():
+                    regression_loss.append(torch.tensor(0).float().cuda())
+                    classification_loss.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_loss.append(torch.tensor(0).float().cuda())
+                    classification_loss.append(torch.tensor(0).float().cuda())
+                
+                # no loss or no det. Move on to the next item
+                continue
+                
+            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
+
+            iou_score = iou(anchors[0, :, :], box_annotation[:, :4])
+            iou_max, iou_argmax = torch.max(iou_score, dim=1)
+
+            targets = torch.ones(classification.shape) * -1
+            if self.device == "cuda" and torch.cuda.is_available():
+                targets = targets.cuda() 
+            
+            # zeroing out the indices with IOU less than 0.4
+            targets[torch.lt(iou_max, 0.4), :] = 0
+
+            # getting the indices with IoU score > 0.5
+            positive_idx = torch.ge(iou_max, 0.5)
+            num_positive_idx = positive_idx.sum()
+
+            assigned_annots = box_annotation[iou_argmax, :]
+
+            targets[positive_idx, :] = 0
+            targets[positive_idx, assigned_annots[positive_idx, 4].long()] = 1
+
+            alpha_factor = torch.ones(targets.shape) * self.alpha
+            if self.device == "cuda" and torch.cuda.is_available():
+                alpha_factor = alpha_factor.cuda()
+            
+            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1.0 - alpha_factor)
+            focal_weight =  torch.where(torch.eq(targets, 1.), 1.0 - classification, classification)
+            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
+
+            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
+            cls_loss = alpha_factor * bce 
+
+            zeros = torch.zeros(cls_loss.shape)
+            if self.device == "cuda" and torch.cuda.is_available():
+                zeros = zeros.cuda()
+            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)
+
+            classification_loss.append(
+                cls_loss.sum() / torch.clamp(num_positive_idx.float(), min=1.0)
+            )   
+
+            # implement regression loss      
+            if num_positive_idx > 0:
+
+                assigned_annots = assigned_annots[positive_idx, :]
+
+                anchor_widths_i = anchor_widths[positive_idx]
+                anchor_heights_i = anchor_heights[positive_idx]
+                anchor_xi = anchor_x[positive_idx]
+                anchor_yi = anchor_y[positive_idx]
+
+                true_widths = assigned_annots[:, 2] - assigned_annots[:, 0]
+                true_heights = assigned_annots[:, 3] - assigned_annots[:, 1]
+                true_x = assigned_annots[:, 0] + 0.5 * true_widths
+                true_y = assigned_annots[:, 1] + 0.5 * true_heights
+
+                true_heights = torch.clamp(true_heights, min=1)
+                true_widths = torch.clamp(true_widths, min=1)
+
+                targets_dx = (true_x - anchor_xi) / anchor_widths_i
+                targets_dy = (true_y - anchor_yi) / anchor_heights_i
+                targets_dw = torch.log(true_widths / anchor_widths_i)
+                targets_dh = torch.log(true_heights / anchor_heights_i)
+                
+                targets = torch.stack((
+                    targets_dx, targets_dy, targets_dw, targets_dh
+                ))
+                targets = targets.t()
+
+                norm = torch.Tensor([0.1, 0.1, 0.2, 0.2])
+                if self.device == "cuda" and torch.cuda.is_available():
+                    norm = norm.cuda() 
+                targets = targets / norm  
+
+                regression_diff = torch.abs(targets - regression[positive_idx, :])
+                regression_loss_i = torch.where(
+                    torch.le(regression_diff, 1.0/9.0), 
+                    0.5 * 9.0 * torch.pow(regression_diff, 2), 
+                    regression_diff - 0.5 / 9.0
+                )
+                regression_loss.append(regression_loss_i.mean())
+            
+            else:
+
+                if self.device == "cuda" and torch.cuda.is_available():
+                    regression_loss.append(torch.tensor(0).float().cuda())
+                else: 
+                    regression_loss.append(torch.tensor(0).float())
+
+        
+        return_cls_loss = torch.stack(classification_loss).mean(dim=0, keepdim=True)
+        return_reg_loss = torch.stack(regression_loss).mean(dim=0, keepdim=True)
+
+        return return_cls_loss, return_reg_loss
+
+"""
+
+class FocalLoss(nn.Module):
+    def __init__(self, alpha=0.25, gamma=2, device="cuda"):
+        
+        super(FocalLoss, self).__init__()
+        
+        self.alpha = alpha
+        self.gamma = gamma 
+        self.device = device
+
+    def forward(self, classifications, regressions, anchors, annotations):
+
+        batch_size = classifications.shape[0]
+        classification_losses = []
+        regression_losses = []
+
+        anchor = anchors[0, :, :]
+
+        anchor_widths = anchor[:, 2] - anchor[:, 0]
+        anchor_heights = anchor[:, 3] - anchor[:, 1]
+        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
+        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
+
+        for j in range(batch_size):
+
+            classification = classifications[j, :, :]
+            regression = regressions[j, :, :]
+
+            bbox_annotation = annotations[j, :, :]
+            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
+
+            if bbox_annotation.shape[0] == 0:
+                if self.device == "cuda" and torch.cuda.is_available():
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                    classification_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+                    classification_losses.append(torch.tensor(0).float())
+
+                continue
+
+            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
+
+            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])
+
+            IoU_max, IoU_argmax = torch.max(IoU, dim=1)
+
+            # compute the loss for classification
+            targets = torch.ones(classification.shape) * -1
+            if self.device == "cuda" and torch.cuda.is_available():
+                targets = targets.cuda()
+
+            targets[torch.lt(IoU_max, 0.4), :] = 0
+
+            positive_indices = torch.ge(IoU_max, 0.5)
+
+            num_positive_anchors = positive_indices.sum()
+
+            assigned_annotations = bbox_annotation[IoU_argmax, :]
+
+            targets[positive_indices, :] = 0
+            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
+
+            alpha_factor = torch.ones(targets.shape) * self.alpha
+            if self.device == "cuda" and torch.cuda.is_available():
+                alpha_factor = alpha_factor.cuda()
+
+            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
+            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
+            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
+
+            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
+
+            cls_loss = focal_weight * bce
+
+            zeros = torch.zeros(cls_loss.shape)
+            if self.device == "cuda" and torch.cuda.is_available():
+                zeros = zeros.cuda()
+            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)
+
+            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
+
+
+            if positive_indices.sum() > 0:
+                assigned_annotations = assigned_annotations[positive_indices, :]
+
+                anchor_widths_pi = anchor_widths[positive_indices]
+                anchor_heights_pi = anchor_heights[positive_indices]
+                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
+                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
+
+                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
+                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
+                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
+                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
+
+                gt_widths = torch.clamp(gt_widths, min=1)
+                gt_heights = torch.clamp(gt_heights, min=1)
+
+                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
+                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
+                targets_dw = torch.log(gt_widths / anchor_widths_pi)
+                targets_dh = torch.log(gt_heights / anchor_heights_pi)
+
+                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
+                targets = targets.t()
+
+                norm = torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
+                if self.device == "cuda" and torch.cuda.is_available():
+                    norm = norm.cuda()
+                targets = targets / norm
+
+                regression_diff = torch.abs(targets - regression[positive_indices, :])
+
+                regression_loss = torch.where(
+                    torch.le(regression_diff, 1.0 / 9.0),
+                    0.5 * 9.0 * torch.pow(regression_diff, 2),
+                    regression_diff - 0.5 / 9.0
+                )
+                regression_losses.append(regression_loss.mean())
+            else:
+                if self.device == "cuda" and torch.cuda.is_available():
+                    regression_losses.append(torch.tensor(0).float().cuda())
+                else:
+                    regression_losses.append(torch.tensor(0).float())
+
+        return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0,
+                                                                                                                 keepdim=True)
+
+
diff --git a/pyvision/detection/efficientdet/lib/model.py b/pyvision/detection/efficientdet/lib/model.py
new file mode 100644
index 0000000..509edca
--- /dev/null
+++ b/pyvision/detection/efficientdet/lib/model.py
@@ -0,0 +1,400 @@
+import torch.nn as nn 
+import torch  
+
+import math 
+
+from efficientnet_pytorch import EfficientNet as EffNet  
+from .utils import BBoxTransform, ClipBoxes, Anchors
+from .losses import FocalLoss
+from  torchvision.ops.boxes import nms as torch_nms 
+
+def nms(dets, thresh):
+    return torch_nms(
+        dets[:, :4], dets[:, 4], thresh
+    )
+
+class ConvBlock(nn.Module):
+
+    def __init__(self, num_channels):
+        
+        super(ConvBlock, self).__init__()
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(num_channels, num_channels, kernel_size=3, stride=1, padding=1, groups=num_channels), 
+            nn.Conv2d(num_channels, num_channels, kernel_size=1, stride=1, padding=0), 
+            nn.BatchNorm2d(num_features=num_channels, momentum=0.9997, eps=4e-5), 
+            nn.ReLU()
+        )
+        
+    def forward(self, x):
+        
+        return self.conv(x)
+
+class BiFPN(nn.Module):
+
+    def __init__(self, num_channels, eps=1e-4):
+
+        super(BiFPN, self).__init__()
+
+        self.eps = eps
+
+        # Here, we define the various conv layers
+        self.conv6_up = ConvBlock(num_channels)
+        self.conv5_up = ConvBlock(num_channels)
+        self.conv4_up = ConvBlock(num_channels)
+        self.conv3_up = ConvBlock(num_channels)
+        self.conv4_down = ConvBlock(num_channels)
+        self.conv5_down = ConvBlock(num_channels)
+        self.conv6_down = ConvBlock(num_channels)
+        self.conv7_down = ConvBlock(num_channels)
+
+        # Feature scaling layers
+        self.p6_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p5_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p4_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.p3_upsample = nn.Upsample(scale_factor=2, mode='nearest')
+
+        self.p4_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p5_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p6_downsample = nn.MaxPool2d(kernel_size=2)
+        self.p7_downsample = nn.MaxPool2d(kernel_size=2)
+
+        # Weight
+        self.p6_w1 = nn.Parameter(torch.ones(2))
+        self.p6_w1_relu = nn.ReLU()
+        self.p5_w1 = nn.Parameter(torch.ones(2))
+        self.p5_w1_relu = nn.ReLU()
+        self.p4_w1 = nn.Parameter(torch.ones(2))
+        self.p4_w1_relu = nn.ReLU()
+        self.p3_w1 = nn.Parameter(torch.ones(2))
+        self.p3_w1_relu = nn.ReLU()
+
+        self.p4_w2 = nn.Parameter(torch.ones(3))
+        self.p4_w2_relu = nn.ReLU()
+        self.p5_w2 = nn.Parameter(torch.ones(3))
+        self.p5_w2_relu = nn.ReLU()
+        self.p6_w2 = nn.Parameter(torch.ones(3))
+        self.p6_w2_relu = nn.ReLU()
+        self.p7_w2 = nn.Parameter(torch.ones(2))
+        self.p7_w2_relu = nn.ReLU()        
+
+   
+    def forward(self, inputs):
+        """
+            P7_0 -------------------------- P7_2 -------->
+            P6_0 ---------- P6_1 ---------- P6_2 -------->
+            P5_0 ---------- P5_1 ---------- P5_2 -------->
+            P4_0 ---------- P4_1 ---------- P4_2 -------->
+            P3_0 -------------------------- P3_2 -------->
+        """
+
+        # P3_0, P4_0, P5_0, P6_0 and P7_0
+        p3_in, p4_in, p5_in, p6_in, p7_in = inputs[0], inputs[1], inputs[2], inputs[3], inputs[4]
+        
+        # P7_0 to P7_2
+        # Weights for P6_0 and P7_0 to P6_1
+        p6_w1 = self.p6_w1_relu(self.p6_w1)
+        weight = p6_w1 / (torch.sum(p6_w1, dim=0) + self.eps)
+        
+        # Connections for P6_0 and P7_0 to P6_1 respectively
+        p6_up = self.conv6_up(weight[0] * p6_in + weight[1] * self.p6_upsample(p7_in))
+        
+        # Weights for P5_0 and P6_0 to P5_1
+        p5_w1 = self.p5_w1_relu(self.p5_w1)
+        weight = p5_w1 / (torch.sum(p5_w1, dim=0) + self.eps)
+        
+        # Connections for P5_0 and P6_0 to P5_1 respectively
+        p5_up = self.conv5_up(weight[0] * p5_in + weight[1] * self.p5_upsample(p6_up))
+        
+        # Weights for P4_0 and P5_0 to P4_1
+        p4_w1 = self.p4_w1_relu(self.p4_w1)
+        weight = p4_w1 / (torch.sum(p4_w1, dim=0) + self.eps)
+        
+        # Connections for P4_0 and P5_0 to P4_1 respectively
+        p4_up = self.conv4_up(weight[0] * p4_in + weight[1] * self.p4_upsample(p5_up))
+
+        # Weights for P3_0 and P4_1 to P3_2
+        p3_w1 = self.p3_w1_relu(self.p3_w1)
+        weight = p3_w1 / (torch.sum(p3_w1, dim=0) + self.eps)
+        
+        # Connections for P3_0 and P4_1 to P3_2 respectively
+        p3_out = self.conv3_up(weight[0] * p3_in + weight[1] * self.p3_upsample(p4_up))
+
+        # Weights for P4_0, P4_1 and P3_2 to P4_2
+        p4_w2 = self.p4_w2_relu(self.p4_w2)
+        weight = p4_w2 / (torch.sum(p4_w2, dim=0) + self.eps)
+        
+        # Connections for P4_0, P4_1 and P3_2 to P4_2 respectively
+        p4_out = self.conv4_down(
+            weight[0] * p4_in + weight[1] * p4_up + weight[2] * self.p4_downsample(p3_out))
+        
+        # Weights for P5_0, P5_1 and P4_2 to P5_2
+        p5_w2 = self.p5_w2_relu(self.p5_w2)
+        weight = p5_w2 / (torch.sum(p5_w2, dim=0) + self.eps)
+        
+        # Connections for P5_0, P5_1 and P4_2 to P5_2 respectively
+        p5_out = self.conv5_down(
+            weight[0] * p5_in + weight[1] * p5_up + weight[2] * self.p5_downsample(p4_out))
+        
+        # Weights for P6_0, P6_1 and P5_2 to P6_2
+        p6_w2 = self.p6_w2_relu(self.p6_w2)
+        weight = p6_w2 / (torch.sum(p6_w2, dim=0) + self.eps)
+        
+        # Connections for P6_0, P6_1 and P5_2 to P6_2 respectively
+        p6_out = self.conv6_down(
+            weight[0] * p6_in + weight[1] * p6_up + weight[2] * self.p6_downsample(p5_out))
+        
+        # Weights for P7_0 and P6_2 to P7_2
+        p7_w2 = self.p7_w2_relu(self.p7_w2)
+        weight = p7_w2 / (torch.sum(p7_w2, dim=0) + self.eps)
+        
+        # Connections for P7_0 and P6_2 to P7_2
+        p7_out = self.conv7_down(weight[0] * p7_in + weight[1] * self.p7_downsample(p6_out))
+
+        return p3_out, p4_out, p5_out, p6_out, p7_out
+
+
+class Regressor(nn.Module):
+    
+    def __init__(self, in_channels, num_anchors, num_layers):
+        
+        super(Regressor, self).__init__()
+        
+        layers = []
+        for _ in range(num_layers):
+            layers.append(nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1))
+            layers.append(nn.ReLU(True))
+        
+        self.layers = nn.Sequential(*layers)
+        self.header = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, inputs):
+        
+        inputs = self.layers(inputs)
+        inputs = self.header(inputs)
+        output = inputs.permute(0, 2, 3, 1)
+        
+        return output.contiguous().view(output.shape[0], -1, 4)
+    
+class Classifier(nn.Module):
+
+    def __init__(self, in_channels, num_anchors, num_classes, num_layers):
+
+        super(Classifier, self).__init__()
+
+        self.num_anchors = num_anchors
+        self.num_classes = num_classes
+        
+        layers = []
+        for _ in range(num_layers):
+            layers.append(
+                nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+            )
+            layers.append(
+                nn.ReLU(True)
+            )
+        
+        self.layers = nn.Sequential(*layers)
+        self.header = nn.Conv2d(in_channels, num_anchors*num_classes, kernel_size=3, stride=1, padding=1)
+        self.activation = nn.Sigmoid()
+
+    def forward(self, x):
+
+        x = self.layers(x)
+        x = self.header(x)
+        x = self.activation(x)
+
+        x = x.permute(0, 2, 3, 1)
+
+        output = x.contiguous().view(
+            x.shape[0], x.shape[1], x.shape[2], self.num_anchors, self.num_classes
+        )
+
+        return output.contiguous().view(
+            output.shape[0], -1, self.num_classes
+        )
+    
+
+class EfficientNet(nn.Module):
+
+    def __init__(self, model_coeff=0, pretrained=False):
+
+        super(EfficientNet, self).__init__()
+
+        if model_coeff not in [0, 1, 2, 3, 4, 5, 6, 7]:
+            raise ValueError(f"{model_coeff} not a valid model. Models supported are b0, b1, b2, b3, b4, b5, b6, b7")
+        model_version = f"efficientnet-b{model_coeff}"
+
+        if pretrained:
+            model = EffNet.from_pretrained(model_version) # change to local load later
+        else:
+            model = EffNet.from_name(model_version)
+        
+        del model._conv_head
+        del model._bn1
+        del model._avg_pooling
+        del model._dropout
+        del model._fc
+
+        self.model = model 
+    
+    def forward(self, x):
+
+        x = self.model._swish(self.model._bn0(self.model._conv_stem(x)))
+        
+        feature_maps = []
+        for idx, block in enumerate(self.model._blocks):
+            
+            drop_connect_rate = self.model._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.model._blocks)
+            
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            
+            if block._depthwise_conv.stride == [2, 2]:
+                feature_maps.append(x)
+
+        return feature_maps[1:]
+
+
+class EfficientDet(nn.Module):
+
+    def __init__(self, num_anchors=9, num_classes=20, model_coeff=0, focal_alpha=0.25, focal_gamma=2, pretrained=False, device="cuda"):
+
+        super(EfficientDet, self).__init__()
+
+        self.model_coeff = model_coeff
+        self.num_classes = num_classes
+        self.num_anchors = num_anchors
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.device = device
+        
+        self.num_channels = [64, 88, 112, 160, 224, 288, 384, 384][self.model_coeff]
+        
+        # model specific conv layer configurations
+        in_channels = [
+            (40, 80, 192, 192),     #b0
+            (40, 80, 192, 192),     #b1
+            (48, 88, 208, 208),     #b2
+            (48, 96, 232, 232),     #b3
+            (56, 112, 272, 272),    #b4
+            (64, 128, 304, 304),    #b5
+            (72, 144, 344, 344),    #b6
+            (80, 160, 384, 384)     #b7
+        ]
+
+        self.conv3 = nn.Conv2d(in_channels[self.model_coeff][0], self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv4 = nn.Conv2d(in_channels[self.model_coeff][1], self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv5 = nn.Conv2d(in_channels[self.model_coeff][2], self.num_channels, kernel_size=1, stride=1, padding=0)
+        self.conv6 = nn.Conv2d(in_channels[self.model_coeff][3], self.num_channels, kernel_size=3, stride=2, padding=1)
+        self.conv7 = nn.Sequential(
+            nn.ReLU(), 
+            nn.Conv2d(self.num_channels, self.num_channels, kernel_size=3, stride=2, padding=1)
+        )
+
+        self.bifpn = nn.Sequential(*[BiFPN(self.num_channels) for _ in range(min(2+self.model_coeff, 8))])
+
+        self.regressor = Regressor(
+            in_channels=self.num_channels, 
+            num_anchors=self.num_anchors, 
+            num_layers=3 + self.model_coeff // 3
+        )
+
+        self.classifier = Classifier(
+            in_channels=self.num_channels, 
+            num_anchors=self.num_anchors, 
+            num_classes=self.num_classes, 
+            num_layers=3 + self.model_coeff // 3
+        )
+
+        self.anchors = Anchors()
+        self.regressBoxes = BBoxTransform()
+        self.clipBoxes = ClipBoxes()
+        self.focalloss = FocalLoss(alpha=self.focal_alpha, gamma=self.focal_gamma, device=self.device)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+        prior = 0.01
+        self.classifier.header.weight.data.fill_(0)
+        self.classifier.header.bias.data.fill_(-math.log((1.0 - prior) / prior))
+
+        self.regressor.header.weight.data.fill_(0)
+        self.regressor.header.bias.data.fill_(0)
+
+        self.backbone_net = EfficientNet(model_coeff=self.model_coeff, pretrained=pretrained)
+
+
+    def freeze_bn(self):
+        
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+
+    def forward(self, x):
+
+        if len(x) == 2:
+            is_training = True 
+            imgs, annots = x 
+        else:
+            is_training = False
+            imgs = x 
+
+        c3, c4, c5 = self.backbone_net(imgs)
+        p3 = self.conv3(c3)
+        p4 = self.conv4(c4)
+        p5 = self.conv5(c5)
+        p6 = self.conv6(c5)
+        p7 = self.conv7(p6)
+
+        features = [p3, p4, p5, p6, p7]
+        features = self.bifpn(features)
+
+        regression = torch.cat([self.regressor(feature) for feature in features], dim=1)
+        classification = torch.cat([self.classifier(feature) for feature in features], dim=1)
+        anchors = self.anchors(imgs)
+
+        if is_training:
+            return self.focalloss(classification, regression, anchors, annots)
+        else:
+
+            transformed_anchors = self.regressBoxes(anchors, regression)
+            transformed_anchors = self.clipBoxes(transformed_anchors, imgs)
+
+            scores = torch.max(classification, dim=2, keepdim=True)[0]
+            
+            scores_gt_thresh = (scores > 0.05)[0, :, 0]
+            if scores_gt_thresh.sum() == 0:
+                return [
+                    torch.zeros(0), 
+                    torch.zeros(0), 
+                    torch.zeros(0, 4)
+                ]
+
+            classification = classification[:, scores_gt_thresh, :]
+            transformed_anchors = transformed_anchors[:, scores_gt_thresh, :]
+            scores = scores[:, scores_gt_thresh, :]
+
+            anchors_nms_idx = nms(torch.cat([transformed_anchors, scores], dim=2)[0, :, :], 0.5)
+
+            nms_scores, nms_classes = classification[0, anchors_nms_idx, :].max(dim=1)
+            
+            return [nms_scores, nms_classes, transformed_anchors[0, anchors_nms_idx, :]]        
+
+
+
+if __name__ == '__main__':
+    
+    def count_parameters(model):
+        return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    model = EfficientDet(num_classes=80)
+    print (count_parameters(model))
+
+
diff --git a/pyvision/detection/efficientdet/lib/utils.py b/pyvision/detection/efficientdet/lib/utils.py
new file mode 100644
index 0000000..4768423
--- /dev/null
+++ b/pyvision/detection/efficientdet/lib/utils.py
@@ -0,0 +1,173 @@
+import torch  
+import torch.nn as nn 
+import numpy as np  
+
+class BBoxTransform(nn.Module):
+
+    def __init__(self, mean=None, std=None, gpu=False):
+
+        super(BBoxTransform, self).__init__()
+
+        if mean is None:
+            self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
+        else:
+            self.mean = mean 
+        if std is None:
+            self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
+        else:
+            self.std = std 
+        if gpu and torch.cuda.is_available():
+            self.mean = self.mean.cuda() 
+            self.std = self.std.cuda() 
+
+    def forward(self, boxes, deltas):
+
+        widths = boxes[:, :, 2] - boxes[:, :, 0]
+        heights = boxes[:, :, 3] - boxes[:, :, 1]
+        ctrx = boxes[:, :, 0] + 0.5 * widths
+        ctry = boxes[:, :, 1] + 0.5 * heights
+
+        dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
+        dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
+        dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
+        dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
+
+        pred_x = ctrx + dx * widths
+        pred_y = ctry + dy * heights
+        pred_w = torch.exp(dw) * widths
+        pred_h = torch.exp(dh) * heights
+
+        pred_boxes_x1 = pred_x - 0.5 * pred_w
+        pred_boxes_y1 = pred_y - 0.5 * pred_h 
+        pred_boxes_x2 = pred_x + 0.5 * pred_w 
+        pred_boxes_y2 = pred_y + 0.5 * pred_h
+
+        pred_boxes = torch.stack([
+            pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2
+        ], dim=2)
+
+        return pred_boxes
+
+
+class ClipBoxes(nn.Module):
+
+    def __init__(self):
+        super(ClipBoxes, self).__init__()
+
+    def forward(self, boxes, img):
+        
+        batch_size, num_channels, height, width = img.shape 
+
+        boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
+        boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
+
+        boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
+        boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
+
+        return boxes
+
+class Anchors(nn.Module):
+    def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
+        super(Anchors, self).__init__()
+
+        if pyramid_levels is None:
+            self.pyramid_levels = [3, 4, 5, 6, 7]
+        if strides is None:
+            self.strides = [2 ** x for x in self.pyramid_levels]
+        if sizes is None:
+            self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
+        if ratios is None:
+            self.ratios = np.array([0.5, 1, 2])
+        if scales is None:
+            self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    def forward(self, image):
+
+        image_shape = image.shape[2:]
+        image_shape = np.array(image_shape)
+        image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
+
+        all_anchors = np.zeros((0, 4)).astype(np.float32)
+
+        for idx, p in enumerate(self.pyramid_levels):
+            anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
+            shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
+            all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
+
+        all_anchors = np.expand_dims(all_anchors, axis=0)
+
+        anchors = torch.from_numpy(all_anchors.astype(np.float32))
+        if torch.cuda.is_available():
+            anchors = anchors.cuda()
+        return anchors
+
+
+def generate_anchors(base_size=16, ratios=None, scales=None):
+    if ratios is None:
+        ratios = np.array([0.5, 1, 2])
+
+    if scales is None:
+        scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+
+    num_anchors = len(ratios) * len(scales)
+    anchors = np.zeros((num_anchors, 4))
+    anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
+    areas = anchors[:, 2] * anchors[:, 3]
+    anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
+    anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
+    anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
+    anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
+
+    return anchors
+
+
+def compute_shape(image_shape, pyramid_levels):
+    image_shape = np.array(image_shape[:2])
+    image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
+    return image_shapes
+
+
+def shift(shape, stride, anchors):
+    shift_x = (np.arange(0, shape[1]) + 0.5) * stride
+    shift_y = (np.arange(0, shape[0]) + 0.5) * stride
+    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+    shifts = np.vstack((
+        shift_x.ravel(), shift_y.ravel(),
+        shift_x.ravel(), shift_y.ravel()
+    )).transpose()
+
+    A = anchors.shape[0]
+    K = shifts.shape[0]
+    all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
+    all_anchors = all_anchors.reshape((K * A, 4))
+
+    return all_anchors
+
+
+def colors(label):
+
+    if isinstance(label, str):
+        label = int(label)
+        
+    box_colors = [(39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), (14, 89, 122),
+          (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, 132), (169, 158, 85), (188, 185, 26), (103, 1, 17),
+          (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60),
+          (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, 159), (182, 173, 32), (34, 113, 133), (90, 135, 34),
+          (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, 112), (89, 60, 55), (15, 54, 88), (112, 75, 181),
+          (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108),
+          (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, 148), (182, 101, 89), (44, 65, 179), (1, 33, 26),
+          (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50),
+          (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33),
+          (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91),
+          (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, 106), (113, 68, 54), (136, 116, 112), (119, 139, 130),
+          (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, 180), (49, 119, 155), (153, 50, 183), (125, 38, 3),
+          (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, 148), (28, 144, 118), (29, 9, 24), (175, 45, 108),
+          (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95),
+          (2, 20, 184), (122, 37, 185)]
+    
+    return box_colors[label]
+
+
+        
+
+        
diff --git a/pyvision/detection/efficientdet/model.py b/pyvision/detection/efficientdet/model.py
new file mode 100644
index 0000000..80f742f
--- /dev/null
+++ b/pyvision/detection/efficientdet/model.py
@@ -0,0 +1,195 @@
+import os 
+import numpy as np 
+import shutil
+import cv2
+from PIL import Image 
+import sys 
+import time 
+import yaml
+import gdown
+
+import torch
+import torch.nn as nn  
+from torchvision import transforms
+
+from .lib.model import EfficientDet
+from .lib.utils import colors
+
+__PREFIX__ = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.append(__PREFIX__)
+
+import yaml
+import json
+import re
+
+
+
+class EffdetInferAPI(object):
+
+    def __init__(self, dataset='coco', thresh=0.4, gpu=False, common_size=512, verbose=False, wtspath="weights/", model_path=None):
+
+        self.model_path = model_path
+        self.verbose = verbose
+        self.common_size = common_size
+        self.thresh = thresh 
+
+        self.mean = np.array([[[0.485, 0.456, 0.406]]])
+        self.std = np.array([[[0.229, 0.224, 0.225]]])
+
+        with open(__PREFIX__ + f"/config/dataset_{dataset}.yaml", "r") as f:
+            config_file = yaml.safe_load(f)     
+
+        self.class_list = config_file["class_list"]
+        self.model_name = config_file['model_name']
+
+
+        if gpu and not torch.cuda.is_available():
+            raise ValueError(f"gpu not available but found gpu={gpu}")   
+        self.device = "cuda" if gpu else "cpu"
+        self.gpu = gpu
+
+        
+        #Instantiate the model
+        self.model = EfficientDet(
+            model_coeff = 0, 
+            num_classes = len(self.class_list), 
+            device = self.device
+        )
+        
+        wtspath = wtspath+"{}.pth".format(self.model_name)
+        resp = self._check_or_download_weights(__PREFIX__+"/"+wtspath)
+        if resp == 0:
+            print("weights downloaded.")
+        else:
+            print("weights found.")   
+        
+        if self.model_path is None:
+            self.model_path = __PREFIX__+"/"+wtspath
+        self.model.load_state_dict(torch.load(self.model_path))
+
+        self.model = self.model.to(self.device)
+    
+
+    def _check_or_download_weights(self, wtspath):
+
+        if os.path.join(__PREFIX__, "weights") not in wtspath and not os.path.exists(wtspath):
+            raise FileNotFoundError("File not found. Either file doesnt exist or directory provided")
+        elif not os.path.exists(wtspath):
+
+            if os.path.exists(__PREFIX__+"/weights/") and len(os.listdir(__PREFIX__+"/weights/")) == 0:
+                os.rmdir(__PREFIX__+"/"+"weights/")
+                os.mkdir(__PREFIX__+"/weights/")
+            
+            if not os.path.exists(__PREFIX__+"/weights/"):
+                os.mkdir(__PREFIX__+"/weights/")
+
+            with open(os.path.join(__PREFIX__, "config/weights_download.json")) as fp:
+                json_file = json.load(fp)
+                print("fetching file ids for {}".format(self.model_name))
+                file_id = json_file[self.model_name]
+            
+            url = 'https://drive.google.com/uc?id={}'.format(file_id)
+            wtspath = __PREFIX__ + "/weights/{}.pth".format(self.model_name)
+            gdown.download(url, wtspath, quiet=False)
+
+            self.wtspath = wtspath
+
+            return 0
+        else:
+            self.wtspath = wtspath
+            return 1
+
+    def detect(self, img):
+
+        if isinstance(img, str):
+            if os.path.exists(img):
+                img_name = os.path.basename(img)
+                img = cv2.imread(img)
+            else:
+                raise FileNotFoundError("2",img)
+        elif isinstance(img, np.ndarray):
+            pass
+        elif isinstance(img, Image.Image):
+            img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
+
+        orig_img = img
+
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        img = img.astype(np.float32) / 255.0 
+        img = (img.astype(np.float32) - self.mean) / self.std
+        height, width, _ = img.shape      
+
+        if height > width:
+            scale = self.common_size / height
+            resized_height = self.common_size
+            resized_width = int(width * scale)
+        else:
+            scale = self.common_size / width
+            resized_height = int(height * scale)
+            resized_width = self.common_size
+
+        img = cv2.resize(img, (resized_width, resized_height))  
+
+        new_img = np.zeros((self.common_size, self.common_size, 3))
+        new_img[0:resized_height, 0:resized_width] = img
+
+        img = torch.from_numpy(img)
+
+        start_time = time.time() 
+        with torch.no_grad():
+            img = img.to(self.device)
+            scores, labels, boxes = self.model(img.permute(2, 0, 1).float().unsqueeze(dim=0))
+            boxes /= scale 
+        duration = time.time() - start_time
+
+        scores = scores.cpu().numpy()
+        labels = labels.cpu().numpy() 
+        boxes = boxes.cpu().numpy()
+
+        #try:
+        
+        to_delete = []
+        if boxes.shape[0] > 0:
+
+            for boxid in range(boxes.shape[0]):
+                pred_probs = float(scores[boxid])
+                #print(pred_probs)
+                if pred_probs < self.thresh:
+                    #print(f"small prob: {pred_probs}")
+                    to_delete.append(boxid)
+                    continue
+                pred_labels = int(labels[boxid])
+                xmin, ymin, xmax, ymax = boxes[boxid, :]
+                
+                color = colors(pred_labels)
+                cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), color, 1)
+                #print("drawing")
+                put_text = self.class_list[pred_labels]+":%.2f"%pred_probs
+                text_size = cv2.getTextSize(self.class_list[pred_labels]+":%.2f"%pred_probs, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
+        
+                # clipping text boxes to prevent out-of-frame boxes
+                text_x_max = xmin + text_size[0] + 3 if (xmin + text_size[0] + 3) < resized_width else resized_width
+                text_y_max = ymin + text_size[1] + 4 if (ymin + text_size[1] + 4) < resized_height else resized_height
+                
+                xmin = int(xmin)
+                ymin = int(ymin)
+                text_x_max = int(text_x_max)
+                text_y_max = int(text_y_max)
+
+                cv2.rectangle(orig_img, (xmin, ymin), (text_x_max, text_y_max), color, -1)
+                cv2.putText(
+                    orig_img, put_text, (xmin, ymin + text_size[1] + 4), 
+                    cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1
+                    )
+
+        
+        scores = np.delete(scores, to_delete)
+        labels = np.delete(labels, to_delete)
+        boxes = np.delete(boxes, to_delete)
+        
+        
+        labels = [self.class_list[label] for label in labels]
+
+        return orig_img, duration, scores, labels, boxes
+
diff --git a/pyvision/detection/efficientdet/readme.md b/pyvision/detection/efficientdet/readme.md
new file mode 100644
index 0000000..e02b8ef
--- /dev/null
+++ b/pyvision/detection/efficientdet/readme.md
@@ -0,0 +1,20 @@
+# EfficientDet: Scalable and Efficient Object Detection
+
+A model zoo implementation of the EfficientDet algorithm.
+
+## Current Stat
+
+* Efficientdet-b0 trained on Dataset-v3 with a loss of 0.13
+
+## Usage
+
+* To Train, from repo root,
+
+```shell
+!python src/models/efficientdet/train.py <args (--help for descriptions of all options)>
+```
+
+## To Do
+
+- [ ] Training b1 - b7 models. Experimenting with focal loss values.
+- [ ] Train API
diff --git a/pyvision/detection/efficientdet/train.py b/pyvision/detection/efficientdet/train.py
new file mode 100644
index 0000000..de3cbd7
--- /dev/null
+++ b/pyvision/detection/efficientdet/train.py
@@ -0,0 +1,393 @@
+import os 
+import argparse
+import time 
+from tqdm.auto import tqdm
+import shutil 
+import numpy as np  
+import sys
+
+import torch.nn as nn 
+import torch
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from tensorboardX import SummaryWriter
+
+sys.path.append(os.path.basename(__file__)+"/lib")
+
+from lib.model import EfficientDet
+from lib.dataset import CustomDataset, Resizer, Normalizer, Augmenter, collater
+
+
+def parse_args():
+
+    parser = argparse.ArgumentParser(description="EfficientDet: Scalable and Efficient Object Detection training module")
+    
+    # General Parameters
+    parser.add_argument("--name", type=str, default="exp_0", help="Name of experiment")
+
+    # Model parameters
+    parser.add_argument("--model_coeff", type=int, default=0, required=True, help="Efficientdet model coeff (b0, b1, ....)")
+    parser.add_argument("--image_size", type=int, default=512, help="The common height and width for all images")
+    parser.add_argument("--ckpt", type=str, help="path to checkpoint from where to resume training ")
+
+    # Training parameters
+    parser.add_argument("--batch_size", type=int, default=8, help="Batch size for training")
+    parser.add_argument("--lr", type=float, default=1e-4, help="Initial Learning rate for training")
+    parser.add_argument("--gpu", type=bool, default=True, required=True, help="True if training is to use GPU. False if not.")
+    parser.add_argument("--alpha", type=float, default=0.25, help="Alpha parameter for focal loss")
+    parser.add_argument("--gamma", type=float, default=1.5, help="Gamma parameter for focal loss")
+    parser.add_argument("--epochs", type=int, default=100, help="Number of epochs to run training for")
+    parser.add_argument("--es_min_delta", type=float, default=0.0, help="Early Stopping's Parameter: minimum change in loss to qualify as improvement")
+    parser.add_argument("--es_patience", type=int, default=0, help="Early stopping's parameter: Number of epochs with no improvement in loss to stop training. 0 to disable")
+
+    # Logging parameters
+    parser.add_argument("--log_path", type=str, default="tensorboard/", help="Path to store tensorboard logs")
+    parser.add_argument("--save_path", type=str, default="trained/", help="path to folder where to save trained model")
+    parser.add_argument("--best_epoch", type=int, default=0)
+    parser.add_argument("--best_loss", type=float, default=1e5)
+
+    # Train Dataset parameters
+    
+    # Format of Dataset: 
+    # - Root Directory
+    #       - Annotations (COCO Format)
+    #           - train_instance.json
+    #           - test_instance.json
+    #           - val_instance.json
+    #       - train
+    #           - img1
+    #           - img2 
+    #           .
+    #           .
+    #           - imgn 
+    #       - test
+    #           - img1
+    #           - img2
+    #           .
+    #           .
+    #           - imgn
+    #       - val
+    #           - img1
+    #           - img2
+    #           .
+    #           .
+    #           - imgn
+
+    parser.add_argument("--root_dir", type=str, required=True, help="Path to root dataset directory")
+    parser.add_argument("--coco_dir", type=str, default="./", required=True)
+    parser.add_argument("--img_dir", type=str, required=True, help="Name of the folder containing the imgs in the root dir")
+    parser.add_argument("--set_dir", type=str, required=True, help="name of set (train/test/val) being used for this")
+    parser.add_argument("--num_threads", type=int, default=2, help="Number of threads to utilize for loading data")
+
+    # Validation parameters
+    parser.add_argument("--val", type=bool, default=False, help="Perform validation boolean")
+    parser.add_argument("--val_interval", type=int, default=5, help="Epochs interval after which to run validation")
+    parser.add_argument("--val_dir", type=str, help="Path to Validation set root directory")
+    parser.add_argument("--val_imgs", type=str, help="Path to Val set imgs")
+    parser.add_argument("--val_coco", type=str)
+    parser.add_argument("--val_set", type=str, help="Path to set dir")
+
+    args = parser.parse_args()
+
+    return args
+
+def Train(args):
+
+    if args.gpu and not torch.cuda.is_available():
+        raise ValueError(f"--gpu is {args.gpu} but cuda not found")
+
+    if args.gpu:
+        device = "cuda"
+    else:
+        device = "cpu"
+    
+    # setting the trainloader
+    trainset = CustomDataset(
+        root_dir = args.root_dir + "/" + args.coco_dir, 
+        img_dir = args.img_dir,
+        set_name = args.set_dir,
+        transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()])
+    )
+    trainloader = DataLoader(
+        trainset, 
+        batch_size = args.batch_size, 
+        shuffle = False, 
+        drop_last = False, 
+        collate_fn = collater, 
+        num_workers = args.num_threads 
+    )
+    
+    # If validation is enabled, set the val loader
+    if args.val:
+        
+        valset = CustomDataset(    
+            root_dir = args.val_dir + "/" + args.val_coco, 
+            img_dir = args.val_imgs,
+            set_name = args.val_set,
+            transform = transforms.Compose([Normalizer(), Resizer()])
+        )
+
+        valloader = DataLoader(
+            valset, 
+            batch_size=args.batch_size, 
+            shuffle=False, 
+            drop_last=False, 
+            collate_fn=collater, 
+            num_workers=args.num_threads
+        )    
+    
+    # setting the device and other model params
+    
+    num_classes = trainset.num_classes()
+    efficientdet = EfficientDet(
+        model_coeff = args.model_coeff, 
+        num_classes=num_classes,
+        focal_alpha = args.alpha, 
+        focal_gamma = args.gamma, 
+        device = device
+    )
+
+    # loading pretrained models (if passed)
+    try:
+        efficientdet.load_state_dict(torch.load(args.ckpt))
+        print("checkpoint loaded successfully!")
+    except Exception as e:
+        print("ERROR: Model Loading failed: ", e)
+
+
+    efficientdet = efficientdet.to(device)
+    efficientdet.train()
+
+    # Setting the optimizer and scheduler 
+    optimizer = torch.optim.Adam(efficientdet.parameters(), args.lr)
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
+
+    # set up logging and model save directories
+    args.log_path = args.log_path + "/" + "EfficientDet" + "/" + args.name
+    if os.path.isdir(args.log_path):
+        shutil.rmtree(args.log_path)
+    os.makedirs(args.log_path)
+
+    if os.path.isdir(args.save_path):
+        shutil.rmtree(args.save_path)
+    os.makedirs(args.save_path)
+
+    # setting up the tensorboard writer
+    writer = SummaryWriter(args.log_path)
+
+    len_trainloader = len(trainloader)
+
+    if args.val:
+
+        for epoch in range(args.epochs):
+
+            efficientdet.train()
+
+            epoch_loss = []
+            epoch_progress = tqdm(trainloader)
+            for idx, data  in enumerate(epoch_progress):
+                try:
+                    
+                    # zero grading the optimizer
+                    optimizer.zero_grad()
+
+                    # forward pass
+                    
+                    img_batch = data['img'].to(device).float()
+                    annot_batch = data['annot'].to(device)
+
+                    cls_loss, reg_loss = efficientdet([img_batch, annot_batch])
+
+                    # Optimization block 
+
+                    cls_loss = cls_loss.mean()
+                    reg_loss = reg_loss.mean()
+
+                    total_loss = cls_loss + reg_loss
+                    if total_loss == 0:
+                        continue
+
+                    total_loss.backward()
+
+                    torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) 
+
+                    optimizer.step()
+
+                    epoch_loss.append(float(total_loss))
+                    total_mean_loss = np.mean(epoch_loss)
+
+                    epoch_progress.set_description(
+                        "Epoch: {}/{}, Batch id: {}/{}, Classification Loss: {:.5f}, Regression Loss: {:.5f}, Batch Loss: {:.5f}, Total Loss: {:.5f}".format(
+                            epoch+1, args.epochs, idx, len_trainloader, cls_loss, reg_loss, total_loss, total_mean_loss
+                        )
+                    )
+
+                    writer.add_scalar('Train/Total_Loss', total_mean_loss, epoch * len_trainloader + idx)                   
+                    writer.add_scalar('Train/Regression_Loss', reg_loss, epoch * len_trainloader + idx)
+                    writer.add_scalar('Train/Classification_loss (Focal Loss)', cls_loss, epoch * len_trainloader + idx)
+                
+                except Exception as e:
+                    print(e)
+                    continue
+
+            scheduler.step(np.mean(epoch_loss))
+
+            if epoch % args.val_interval == 0:
+
+                efficientdet.eval()
+                loss_reg_ls = []
+                loss_cls_ls = []
+                
+                for idx, data in enumerate(valloader):
+
+                    img_batch = data['img'].to(device).float()
+                    annot_batch = data['annot'].to(device)
+
+                    with torch.no_grad():
+                        
+                        cls_loss, reg_loss = efficientdet([img_batch, annot_batch])
+
+                        cls_loss = cls_loss.mean()
+                        reg_loss = reg_loss.mean()
+
+                        loss_cls_ls.append(float(cls_loss))
+                        loss_reg_ls.append(float(reg_loss))
+                
+                cls_loss = np.mean(loss_cls_ls)
+                reg_loss = np.mean(loss_reg_ls)
+                loss = cls_loss + reg_loss
+
+                print(
+                    'Epoch: {}/{}, Classification Loss: {:1.5f}, Regression Loss: {:1.5f}, Total Loss: {:1.5f}'.format(
+                        epoch+1, args.epochs, cls_loss, reg_loss, np.mean(loss)
+                    )
+                )
+
+
+
+                writer.add_scalar('Val/Total_Loss', loss, epoch)
+                writer.add_scalar('Val/Regression_Loss', reg_loss, epoch)
+                writer.add_scalar('Val/Classification_Loss', cls_loss, epoch)
+
+                if loss + args.es_min_delta < args.best_loss:
+
+                    args.best_loss = loss
+                    args.best_epoch = epoch 
+                    torch.save(efficientdet, os.path.join(args.save_path, "efficientdet_best.pth"))
+
+                    dummy = torch.rand(1, 3, 512, 512)
+                    dummy = dummy.to(device)
+            
+                if isinstance(efficientdet, nn.DataParallel):
+                
+                    efficientdet.backbone_net.model.set_swish(memory_efficient=False)
+
+                    try:
+                        torch.onnx.export(
+                            efficientdet.module, dummy, os.path.join(args.save_path, "efficientdet_best.onnx"), 
+                            verbose=False, opset_version=11
+                            )   
+                    except:
+                        print("Failed ONNX export")
+
+                else:
+
+                    efficientdet.backbone_net.model.set_swish(memory_efficient=False)
+                    torch.onnx.export(
+                        efficientdet, dummy, os.path.join(args.save_path, "efficientdet_best.onnx"), 
+                        verbose=False, opset_version=11
+                    )
+                    efficientdet.backbone_net.model.set_swish(memory_efficient=True)
+                
+                if epoch - args.best_epoch > args.es_patience > 0:
+                    print(f"Stopped training at epoch: {epoch}, Lowerst loss: {loss}")
+                    break
+                    
+    else:
+
+        for epoch in range(args.epochs):
+
+            efficientdet.train()
+
+            epoch_loss = []
+            epoch_progress = tqdm(trainloader)
+            for idx, data  in enumerate(epoch_progress):
+                try:
+                    
+                    # zero grading the optimizer
+                    optimizer.zero_grad()
+
+                    # forward pass
+                    
+                    img_batch = data['img'].to(device).float()
+                    annot_batch = data['annot'].to(device)
+
+                    cls_loss, reg_loss = efficientdet([img_batch, annot_batch])
+
+                    # Optimization block 
+
+                    cls_loss = cls_loss.mean()
+                    reg_loss = reg_loss.mean()
+
+                    total_loss = cls_loss + reg_loss
+                    if total_loss == 0:
+                        continue
+
+                    total_loss.backward()
+
+                    torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) 
+
+                    optimizer.step()
+
+                    epoch_loss.append(float(total_loss))
+                    total_mean_loss = np.mean(epoch_loss)
+
+                    epoch_progress.set_description(
+                        "Epoch: {}/{}, Batch id: {}/{}, Classification Loss: {:.5f}, Regression Loss: {:.5f}, Batch Loss: {:.5f}, Total Loss: {:.5f}".format(
+                            epoch+1, args.epochs, idx, len_trainloader, cls_loss, reg_loss, total_loss, total_mean_loss
+                        )
+                    )
+
+                    writer.add_scalar('Train/Total_Loss', total_mean_loss, epoch * len_trainloader + idx)                   
+                    writer.add_scalar('Train/Regression_Loss', reg_loss, epoch * len_trainloader + idx)
+                    writer.add_scalar('Train/Classification_loss (Focal Loss)', cls_loss, epoch * len_trainloader + idx)
+                
+                except Exception as e:
+                    print(e)
+                    continue
+
+            scheduler.step(np.mean(epoch_loss))
+
+            torch.save(efficientdet, os.path.join(args.save_path, "efficientdet_best.pth"))
+
+            dummy = torch.rand(1, 3, 512, 512)
+            dummy = dummy.to(device)
+            if isinstance(efficientdet, nn.DataParallel):
+                
+                efficientdet.backbone_net.model.set_swish(memory_efficient=False)
+
+                try:
+                    torch.onnx.export(
+                        efficientdet.module, dummy, os.path.join(args.save_path, "efficientdet_best.onnx"), 
+                        verbose=False, opset_version=11
+                        )   
+                except:
+                    print("Failed ONNX export")
+
+            else:
+
+                efficientdet.backbone_net.model.set_swish(memory_efficient=False)
+                torch.onnx.export(
+                    efficientdet, dummy, os.path.join(args.save_path, "efficientdet_best.onnx"), 
+                    verbose=False, opset_version=11
+                )
+                efficientdet.backbone_net.model.set_swish(memory_efficient=True)
+
+
+    writer.close()
+
+
+    
+if __name__ == "__main__":
+    opts = parse_args()
+    Train(opts)
\ No newline at end of file
diff --git a/tests/detection/effdet/2.jpg b/tests/detection/effdet/2.jpg
new file mode 100644
index 0000000..6bbb7c1
Binary files /dev/null and b/tests/detection/effdet/2.jpg differ
diff --git a/tests/detection/effdet/3.jpg b/tests/detection/effdet/3.jpg
new file mode 100644
index 0000000..058416c
Binary files /dev/null and b/tests/detection/effdet/3.jpg differ
diff --git a/tests/detection/effdet/__init__.py b/tests/detection/effdet/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/detection/effdet/test_effdet.py b/tests/detection/effdet/test_effdet.py
new file mode 100644
index 0000000..1f3d4d1
--- /dev/null
+++ b/tests/detection/effdet/test_effdet.py
@@ -0,0 +1,17 @@
+import cv2 
+from PIL import Image
+from pyvision.detection import efficientdet
+
+model = efficientdet.EfficientDet("coco", thresh=0.95)
+
+img1 = cv2.imread("tests/detection/effdet/2.jpg")
+img2 = cv2.imread("tests/detection/effdet/3.jpg")
+
+imgs = [img1, img2]
+
+for img in imgs:
+    img = cv2.resize(img, (416, 416))
+    res = model.detect(img)
+    cv2.imshow("Frame", res[0])
+    if cv2.waitKey() == ord('q'):
+        continue
\ No newline at end of file