Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test run #58

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions .ipynb_checkpoints/engine-checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Train and eval functions used in main.py
Mostly copy-paste from DETR (https://github.com/facebookresearch/detr).
"""
import math
import os
import sys
from typing import Iterable

import torch

import util.misc as utils
from util.misc import NestedTensor
import numpy as np
import time
import torchvision.transforms as standard_transforms
import cv2

class DeNormalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std

def __call__(self, tensor):
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
return tensor

def vis(samples, targets, pred, vis_dir, des=None):
'''
samples -> tensor: [batch, 3, H, W]
targets -> list of dict: [{'points':[], 'image_id': str}]
pred -> list: [num_preds, 2]
'''
gts = [t['point'].tolist() for t in targets]

pil_to_tensor = standard_transforms.ToTensor()

restore_transform = standard_transforms.Compose([
DeNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
standard_transforms.ToPILImage()
])
# draw one by one
for idx in range(samples.shape[0]):
sample = restore_transform(samples[idx])
sample = pil_to_tensor(sample.convert('RGB')).numpy() * 255
sample_gt = sample.transpose([1, 2, 0])[:, :, ::-1].astype(np.uint8).copy()
sample_pred = sample.transpose([1, 2, 0])[:, :, ::-1].astype(np.uint8).copy()

max_len = np.max(sample_gt.shape)

size = 2
# draw gt
for t in gts[idx]:
sample_gt = cv2.circle(sample_gt, (int(t[0]), int(t[1])), size, (0, 255, 0), -1)
# draw predictions
for p in pred[idx]:
sample_pred = cv2.circle(sample_pred, (int(p[0]), int(p[1])), size, (0, 0, 255), -1)

name = targets[idx]['image_id']
# save the visualized images
if des is not None:
cv2.imwrite(os.path.join(vis_dir, '{}_{}_gt_{}_pred_{}_gt.jpg'.format(int(name),
des, len(gts[idx]), len(pred[idx]))), sample_gt)
cv2.imwrite(os.path.join(vis_dir, '{}_{}_gt_{}_pred_{}_pred.jpg'.format(int(name),
des, len(gts[idx]), len(pred[idx]))), sample_pred)
else:
cv2.imwrite(
os.path.join(vis_dir, '{}_gt_{}_pred_{}_gt.jpg'.format(int(name), len(gts[idx]), len(pred[idx]))),
sample_gt)
cv2.imwrite(
os.path.join(vis_dir, '{}_gt_{}_pred_{}_pred.jpg'.format(int(name), len(gts[idx]), len(pred[idx]))),
sample_pred)

# the training routine
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
data_loader: Iterable, optimizer: torch.optim.Optimizer,
device: torch.device, epoch: int, max_norm: float = 0):
model.train()
criterion.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
# iterate all training samples
for samples, targets in data_loader:
samples = samples.to(device)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
# forward
outputs = model(samples)
# calc the losses
loss_dict = criterion(outputs, targets)
weight_dict = criterion.weight_dict
losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)

# reduce all losses
loss_dict_reduced = utils.reduce_dict(loss_dict)
loss_dict_reduced_unscaled = {f'{k}_unscaled': v
for k, v in loss_dict_reduced.items()}
loss_dict_reduced_scaled = {k: v * weight_dict[k]
for k, v in loss_dict_reduced.items() if k in weight_dict}
losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())

loss_value = losses_reduced_scaled.item()

if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
# backward
optimizer.zero_grad()
losses.backward()
if max_norm > 0:
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
optimizer.step()
# update logger
metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
return {k: meter.global_avg for k, meter in metric_logger.meters.items()}

# the inference routine
@torch.no_grad()
def evaluate_crowd_no_overlap(model, data_loader, device, vis_dir=None):
model.eval()

metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}'))
# run inference on all images to calc MAE
maes = []
mses = []
for samples, targets in data_loader:
samples = samples.to(device)

outputs = model(samples)
outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]

outputs_points = outputs['pred_points'][0]

gt_cnt = targets[0]['point'].shape[0]
# 0.5 is used by default
threshold = 0.5

points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
predict_cnt = int((outputs_scores > threshold).sum())
# if specified, save the visualized images
if vis_dir is not None:
vis(samples, targets, [points], vis_dir)
# accumulate MAE, MSE
mae = abs(predict_cnt - gt_cnt)
mse = (predict_cnt - gt_cnt) * (predict_cnt - gt_cnt)
maes.append(float(mae))
mses.append(float(mse))
# calc MAE, MSE
mae = np.mean(maes)
mse = np.sqrt(np.mean(mses))

return mae, mse
102 changes: 102 additions & 0 deletions .ipynb_checkpoints/run_test-checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import argparse
import datetime
import random
import time
from pathlib import Path

import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2
from crowd_datasets import build_dataset
from engine import *
from models import build_model
import os
import warnings
warnings.filterwarnings('ignore')

def get_args_parser():
parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False)

# * Backbone
parser.add_argument('--backbone', default='vgg16', type=str,
help="name of the convolutional backbone to use")

parser.add_argument('--row', default=2, type=int,
help="row number of anchor points")
parser.add_argument('--line', default=2, type=int,
help="line number of anchor points")

parser.add_argument('--output_dir', default='',
help='path where to save') # 输出路径
parser.add_argument('--weight_path', default='',
help='path where the trained weights saved') # 权重路径

parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation')

return parser

def main(args, debug=False):

os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id)

print(args)
device = torch.device('cuda')
# get the P2PNet
model = build_model(args)
# move to GPU
model.to(device)
# load trained model
if args.weight_path is not None:
checkpoint = torch.load(args.weight_path, map_location='cpu')
model.load_state_dict(checkpoint['model'])
# convert to eval mode
model.eval()
# create the pre-processing transform
transform = standard_transforms.Compose([
standard_transforms.ToTensor(),
standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# set your image path here
img_path = "./vis/demo1.jpg" # 测试数据图片
# load the images
img_raw = Image.open(img_path).convert('RGB')
# round the size
width, height = img_raw.size
new_width = width // 128 * 128
new_height = height // 128 * 128
img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS)
# pre-proccessing
img = transform(img_raw)

samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(device)
# run inference
outputs = model(samples)
outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]

outputs_points = outputs['pred_points'][0]

threshold = 0.5
# filter the predictions
points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
predict_cnt = int((outputs_scores > threshold).sum())

outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0]

outputs_points = outputs['pred_points'][0]
# draw the predictions
size = 2
img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
for p in points:
img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1)
# save the visualized image
cv2.imwrite(os.path.join(args.output_dir, 'pred{}.jpg'.format(predict_cnt)), img_to_draw)

if __name__ == '__main__':
parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
main(args)
Loading