From 2aa1342abafb225875938bcc38860462ac2f7e47 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 13 May 2024 01:07:28 +0900 Subject: [PATCH 01/15] Added optimized code for padim --- anomaly_detection/padim/README.md | 14 ++ anomaly_detection/padim/padim.py | 150 ++++++++++++++--- anomaly_detection/padim/padim_utils.py | 221 ++++++++++++++++++++++++- 3 files changed, 355 insertions(+), 30 deletions(-) diff --git a/anomaly_detection/padim/README.md b/anomaly_detection/padim/README.md index f0836d9cf..87ad18373 100644 --- a/anomaly_detection/padim/README.md +++ b/anomaly_detection/padim/README.md @@ -104,6 +104,20 @@ By adding the `--aug` option, you can process with augmentation. $ python3 padim.py --aug ``` +By adding the `--enable_optimization` option, you can use optimized code, which significantly speeds up distance matrix calculation. You have to have cuda compatible GPU and pytorch installed +(default is processing without optimization) +```bash +$ python3 padim.py --enable_optimization True +``` + +By adding the `--compare_optimization` option, you can compare output of the optimized code with the output of the original code. You have to have cuda compatible GPU and pytorch installed +(default is processing without comparison) +```bash +$ python3 padim.py --compare_optimization True +``` + + + ## PaDiM GUI You can also use the GUI to train and test. diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index 646ba571f..dfea96968 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -80,8 +80,30 @@ '-an', '--aug_num', type=int, default=5, help='specify the amplification number of augmentation.' ) +parser.add_argument( + '-eon', '--enable_optimization', type=bool, default=False, + help='Flag to enable optimized code' +) +parser.add_argument( + '--compare_optimization', type=bool, default=False, + help='Flag to compare output of optimization with original code' +) args = update_parser(parser) +if args.compare_optimization: + args.enable_optimization = True + train_output_list=[] + +if args.enable_optimization: + import torch + if torch.cuda.is_available() : + device = torch.device("cuda") + + else: + device = torch.device("cpu") + logger.info("Torch device : " + str(device)) + + # ====================== # Main functions @@ -152,6 +174,18 @@ def plot_fig(file_list, test_imgs, scores, anormal_scores, gt_imgs, threshold, s fig_img.savefig(savepath_tmp, dpi=100) plt.close() +def infer_init_run(net, params, train_outputs, IMAGE_SIZE): + import numpy as np + dummy_image = np.random.rand(1, 3, 224, 224) * 255.0 # Scale between 0 and 255 + # Convert the dtype to float32 for efficiency + dummy_image = dummy_image.astype(np.float32) + logger.info(f"PaDiM initialization inference starts!") + if args.enable_optimization: + score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device) + else: + score = infer(net, params, train_outputs, dummy_image, IMAGE_SIZE) + logger.info(f"PaDiM initialization inference finish!") + def train_from_image_or_video(net, params): # training @@ -219,6 +253,7 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): test_imgs = [] score_map = [] + infer_init_run(net, params, train_outputs, IMAGE_SIZE) for i_img in range(0, len(args.input)): logger.info('from (%s) ' % (args.input[i_img])) @@ -228,20 +263,40 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) test_imgs.append(img[0]) - + if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 - for i in range(args.benchmark_count): - start = int(round(time.time() * 1000)) - dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) - end = int(round(time.time() * 1000)) - logger.info(f'\tailia processing time {end - start} ms') - if i != 0: - total_time = total_time + (end - start) - logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') + if args.enable_optimization: + for i in range(args.benchmark_count): + start = int(round(time.time() * 1000)) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') + else: + for i in range(args.benchmark_count): + start = int(round(time.time() * 1000)) + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') + if args.compare_optimization: + logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device))}') + + else: - dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + if args.enable_optimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + else: + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + if args.compare_optimization: + logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device)))) + score_map.append(dist_tmp) @@ -264,29 +319,55 @@ def infer_from_video(net, params, train_outputs, threshold): score_map = [] frame_shown = False - while(True): - ret, frame = capture.read() - if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: - break - if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0: - break + infer_init_run(net, params, train_outputs, IMAGE_SIZE) + if args.enable_optimization: + while(True): + ret, frame = capture.read() + if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: + break + if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0: + break - img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) - dist_tmp = infer(net, params, train_outputs, img) + dist_tmp = infer_optimized(net, params, train_outputs, img, device) - score_map.append(dist_tmp) - scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production + score_map.append(dist_tmp) + scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production - heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) - frame = pack_visualize(heat_map, mask, vis_img, scores, IMAGE_SIZE) + heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) + frame = pack_visualize(heat_map, mask, vis_img, scores, IMAGE_SIZE) + + cv2.imshow('frame', frame) + frame_shown = True + + if writer is not None: + writer.write(frame) + else: + while(True): + ret, frame = capture.read() + if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: + break + if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0: + break - cv2.imshow('frame', frame) - frame_shown = True + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) - if writer is not None: - writer.write(frame) + dist_tmp = infer(net, params, train_outputs, img) + + score_map.append(dist_tmp) + scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production + + heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) + frame = pack_visualize(heat_map, mask, vis_img, scores, IMAGE_SIZE) + + cv2.imshow('frame', frame) + frame_shown = True + + if writer is not None: + writer.write(frame) capture.release() cv2.destroyAllWindows() @@ -299,9 +380,24 @@ def train_and_infer(net, params): logger.info('loading train set feature from: %s' % args.feat) with open(args.feat, 'rb') as f: train_outputs = pickle.load(f) + if args.compare_optimization: + train_output_list.append(train_outputs) + if args.enable_optimization: + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + if args.compare_optimization: + train_output_list.append(train_outputs) + logger.info('loaded.') else: train_outputs = train_from_image_or_video(net, params) + if args.compare_optimization: + train_output_list.append(train_outputs) + if args.enable_optimization: + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + if args.compare_optimization: + train_output_list.append(train_outputs) if args.threshold is None: if args.video: diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 1586a77d4..aa4e2aa52 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -16,6 +16,9 @@ from skimage import morphology from skimage.segmentation import mark_boundaries +from padim import args +if args.enable_optimization: + import torch WEIGHT_RESNET18_PATH = 'resnet18.onnx' MODEL_RESNET18_PATH = 'resnet18.onnx.prototxt' @@ -56,6 +59,48 @@ def embedding_concat(x, y): return a +def embedding_concat_optimizded(x, y, device): + B, C1, H1, W1 = x.shape + _, C2, H2, W2 = y.shape + + assert H1 == W1 + + s = H1 // H2 + # Chesboard pattern downscaling + sel = torch.from_numpy(np.asarray([ + np.array([i for i in range(i, H1, s)]) for i in range(s) + ])).to(device) + index3 = sel[torch.repeat_interleave(torch.arange(s, device=device), s)] + index4 = sel[torch.tile(torch.arange(s, device=device), (s,))] + #a = x[:, :, index3[:, None].T, index4.T].permute(0, 1, 4, 2, 3) + a = x[:, :, index3[:, None].permute(2, 1, 0), index4.permute(1, 0)].permute(0, 1, 4, 2, 3) + # Concatenation + z = torch.cat(( + a, + torch.tile(y[:, :, None, :, :], (1, 1, s * s, 1, 1)) + ), axis=1) + # Downsizing and reshaping + z = z.reshape(B, -1, s, s, H2, W2).permute(0, 1, 4, 2, 5, 3).reshape(B, -1, H1, W1) + return z + +def embedding_concat_numpy(x, y): + B, C1, H1, W1 = x.shape + _, C2, H2, W2 = y.shape + assert H1 == W1 + s = H1 // H2 + #Chesboard pattern downscaling + sel = np.asarray([np.array([i for i in range(i, H1, s)]) for i in range(s)]) + index3= sel[np.repeat(np.arange(s), s)] + index4= sel[np.tile(np.arange(s), s)] + a=x[:, :, index3[:, None].T, index4.T].transpose((0, 1, 4, 2, 3)) + #concatination + z=np.concatenate((a, np.tile((y[:, :, None, :, :]), (1, 1, s*s, 1, 1))), axis=1)#.reshape((B, -1, H2 , W2)) + _, C3, _, _, _ = z.shape #(1, 448, 16, 28, 28) + #downsizing and rescaling + z=z.reshape((B, C3, s, s, H2,W2)).transpose((0, 1, 4, 2, 5, 3)).reshape(B, C3, H1, W1) + + return z + def preprocess(img, size, crop_size, mask=False, keep_aspect = True): h, w = img.shape[:2] @@ -69,7 +114,7 @@ def preprocess(img, size, crop_size, mask=False, keep_aspect = True): else: size = (size, size) img = np.array(Image.fromarray(img).resize( - size, resample=Image.ANTIALIAS if not mask else Image.NEAREST)) + size, resample=Image.LANCZOS if not mask else Image.NEAREST)) # center crop h, w = img.shape[:2] @@ -139,11 +184,22 @@ def preprocess_aug(img, size, crop_size, mask=False, keep_aspect = True, angle_r return img -def postprocess(outputs): +def postprocess(outputs, training_c=False): + embedding_vectors = outputs['layer1'] + for layer_name in ['layer2', 'layer3']: + if training_c: + embedding_vectors = embedding_concat_numpy(embedding_vectors, outputs[layer_name]) + else: + embedding_vectors = embedding_concat(embedding_vectors, outputs[layer_name]) + + + return embedding_vectors + +def postprocess_optimized(outputs, device): # Embedding concat embedding_vectors = outputs['layer1'] for layer_name in ['layer2', 'layer3']: - embedding_vectors = embedding_concat(embedding_vectors, outputs[layer_name]) + embedding_vectors = embedding_concat_optimizded(embedding_vectors, outputs[layer_name], device) return embedding_vectors @@ -277,6 +333,114 @@ def training(net, params, size, crop_size, keep_aspect, batch_size, train_dir, a train_outputs = [mean, cov, cov_inv, idx] return train_outputs + +def training_optimized(net, params, size, crop_size, keep_aspect, batch_size, train_dir, aug, aug_num, seed, logger): + # set seed + random.seed(seed) + idx = random.sample(range(0, params["t_d"]), params["d"]) + + if os.path.isdir(train_dir): + train_imgs = sorted([ + os.path.join(train_dir, f) for f in os.listdir(train_dir) + if f.endswith('.png') or f.endswith('.jpg') or f.endswith('.bmp') + ]) + if len(train_imgs) == 0: + logger.error("train images not found in '%s'" % train_dir) + sys.exit(-1) + else: + logger.info("capture 200 frames from video") + train_imgs = capture_training_frames_from_video(train_dir) + + if not aug: + logger.info('extract train set features without augmentation') + aug_num = 1 + else: + logger.info('extract train set features with augmentation') + aug_num = aug_num + mean = None + N = 0 + for i_aug in range(aug_num): + for i_img in range(0, len(train_imgs), batch_size): + # prepare input data + imgs = [] + if not aug: + logger.info('from (%s ~ %s) ' % + (i_img, + min(len(train_imgs) - 1, + i_img + batch_size))) + else: + logger.info('from (%s ~ %s) on augmentation lap %d' % + (i_img, + min(len(train_imgs) - 1, + i_img + batch_size), i_aug)) + for image_path in train_imgs[i_img:i_img + batch_size]: + if type(image_path) is str: + img = load_image(image_path) + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) + else: + img = cv2.cvtColor(image_path, cv2.COLOR_BGR2RGB) + if not aug: + img = preprocess(img, size, crop_size, keep_aspect=keep_aspect) + else: + img = preprocess_aug(img, size, crop_size, keep_aspect=keep_aspect) + imgs.append(img) + + # countup N + N += len(imgs) + + imgs = np.vstack(imgs) + + logger.debug(f'input images shape: {imgs.shape}') + net.set_input_shape(imgs.shape) + + # inference + _ = net.predict(imgs) + + train_outputs = OrderedDict([ + ('layer1', []), ('layer2', []), ('layer3', []) + ]) + for key, name in zip(train_outputs.keys(), params["feat_names"]): + train_outputs[key].append(net.get_blob_data(name)) + for k, v in train_outputs.items(): + train_outputs[k] = v[0] + + embedding_vectors = postprocess(train_outputs, training_c=True) + + # randomly select d dimension + embedding_vectors = embedding_vectors[:, idx, :, :] + + # reshape 2d pixels to 1d features + B, C, H, W = embedding_vectors.shape + embedding_vectors = embedding_vectors.reshape(B, C, H * W) + + # initialize mean and covariance matrix + if (mean is None): + mean = np.zeros((C, H * W), dtype=np.float32) + cov = np.zeros((C, C, H * W), dtype=np.float32) + + # calculate multivariate Gaussian distribution + # (add up mean and covariance matrix) + mean += np.sum(embedding_vectors, axis=0) + for i in range(H * W): + # https://github.com/numpy/numpy/blob/v1.21.0/numpy/lib/function_base.py#L2324-L2543 + m = embedding_vectors[:, :, i] + m = m - (mean[:, [i]].T / N) + cov[:, :, i] += m.T @ m + + # devide mean by N + mean = mean / N + # devide covariance by N-1, and calculate inverse + I = np.identity(C) + for i in range(H * W): + cov[:, :, i] = (cov[:, :, i] / (N - 1)) + 0.01 * I + + cov_inv = np.zeros(cov.shape) + for i in range(H * W): + cov_inv[:, :, i] = np.linalg.inv(cov[:, :, i]) + + train_outputs = [mean, cov, cov_inv, idx] + return train_outputs + def infer(net, params, train_outputs, img, crop_size): # prepare input data imgs = [] @@ -325,6 +489,57 @@ def infer(net, params, train_outputs, img, crop_size): return dist_tmp +def infer_optimized(net, params, train_outputs, img, crop_size, device): + # prepare input data + imgs = [] + imgs.append(img) + imgs = np.vstack(imgs) + + # inference + net.set_input_shape(imgs.shape) + _ = net.predict(imgs) + + test_outputs = OrderedDict([ + ('layer1', []), ('layer2', []), ('layer3', []) + ]) + for key, name in zip(test_outputs.keys(), params["feat_names"]): + test_outputs[key].append(net.get_blob_data(name)) + for k, v in test_outputs.items(): + test_outputs[k] = torch.from_numpy(v[0]).to(device) + + embedding_vectors = postprocess_optimized(test_outputs, device) + + # randomly select d dimension + idx = train_outputs[3] + embedding_vectors = embedding_vectors[:, idx, :, :] + + # reshape 2d pixels to 1d features + B, C, H, W = embedding_vectors.shape + embedding_vectors = embedding_vectors.view(B, C, H * W) + + # calculate distance matrix + mean_vectors = train_outputs[0] + inv_cov_matrices = train_outputs[2] + samples = embedding_vectors[0] + # Step 1: Compute the difference between each sample and its corresponding mean + differences = samples - mean_vectors + # Step 2: Apply the inverse covariance matrix + transformed_differences = torch.einsum('ijk,jk->ik', inv_cov_matrices, differences) + # Step 3: Compute the Mahalanobis distance + dist_tmp = torch.sqrt(torch.sum(differences * transformed_differences, dim=0)) + + # upsample + dist_tmp = dist_tmp.view(1, -1).view( H, W).cpu().numpy() + dist_tmp = np.array(Image.fromarray(dist_tmp).resize( + (crop_size, crop_size), resample=Image.BILINEAR) + ) + + # apply gaussian smoothing on the score map + dist_tmp = gaussian_filter(dist_tmp, sigma=4) + + return dist_tmp + + def normalize_scores(score_map, crop_size, roi_img = None): N = len(score_map) score_map = np.vstack(score_map) From c4e2065cd363c2241cd34a824e5377ff36fe1ca9 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 13 May 2024 01:39:53 +0900 Subject: [PATCH 02/15] fixed a bug --- anomaly_detection/padim/padim.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index dfea96968..1b0090770 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -234,8 +234,11 @@ def decide_threshold_from_gt_image(net, params, train_outputs, gt_imgs): img = load_image(image_path) img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) + if args.enable_optimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + else: + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) - dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) score_map.append(dist_tmp) From af5b61710b362cd4df7dbc661b1a9fdbb6a81624 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Wed, 22 May 2024 17:52:36 +0900 Subject: [PATCH 03/15] fixed arg_utils.py running twice issue --- anomaly_detection/padim/padim.py | 5 +++-- anomaly_detection/padim/padim_utils.py | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index 1b0090770..68e6d2810 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -379,6 +379,7 @@ def infer_from_video(net, params, train_outputs, threshold): def train_and_infer(net, params): + timestart=time.time() if args.feat: logger.info('loading train set feature from: %s' % args.feat) with open(args.feat, 'rb') as f: @@ -387,7 +388,7 @@ def train_and_infer(net, params): train_output_list.append(train_outputs) if args.enable_optimization: train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], - torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] if args.compare_optimization: train_output_list.append(train_outputs) @@ -426,7 +427,7 @@ def train_and_infer(net, params): def main(): # model files check and download - weight_path, model_path, params = get_params(args.arch) + weight_path, model_path, params = get_params(args.arch) check_and_download_models(weight_path, model_path, REMOTE_PATH) # create net instance diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index aa4e2aa52..54319deb4 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -16,9 +16,7 @@ from skimage import morphology from skimage.segmentation import mark_boundaries -from padim import args -if args.enable_optimization: - import torch +import torch WEIGHT_RESNET18_PATH = 'resnet18.onnx' MODEL_RESNET18_PATH = 'resnet18.onnx.prototxt' From ad410b22f8ee3fccfb0be09e649f8cc3c9a92a7f Mon Sep 17 00:00:00 2001 From: YToleubay Date: Fri, 31 May 2024 12:45:46 +0900 Subject: [PATCH 04/15] feat: Add MPS support and file type selection; update GUI with new options - Added support for MPS backend for running on Apple devices. - Added file type selection support for .pt, .npy, and .pkl formats. - Updated GUI to include: - Enable optimization flag. - Benchmark mode. - File type selection. --- anomaly_detection/padim/README.md | 11 +- anomaly_detection/padim/padim.py | 198 +++++++++++++++++--- anomaly_detection/padim/padim_gui.py | 270 +++++++++++++++++++++++++-- 3 files changed, 430 insertions(+), 49 deletions(-) diff --git a/anomaly_detection/padim/README.md b/anomaly_detection/padim/README.md index 87ad18373..9c9ce4849 100644 --- a/anomaly_detection/padim/README.md +++ b/anomaly_detection/padim/README.md @@ -71,8 +71,12 @@ You can specify the directory of normal product files with the `--train_dir` opt $ python3 padim.py --train_dir train ``` -The feature vectors created from files in the train directory are saved to the pickle file. -From the second time, by specifying the pickle file by `--feat` option, +The feature vectors created from files in the train directory can be saved to the pickle, numpy or pickle file. For that you have to specify desired format as `npy`, `pt` or `pkl`. Depending on --enable_optimization flag, files saved might be originally pytorch tensors (True) or numpy arrays (False). +```bash +$ python3 padim.py --save_format pt +``` + +From the second time, by specifying the saved file by `--feat` option, it can omit the calculation of the feature vector of the normal product. The name of the pickle file created is the same as the name of a normal product file directory. ```bash @@ -129,6 +133,7 @@ Start the GUI with the following command. ```bash $ python3 padim_gui.py ``` +Specify the inference mode from the `Set optimization` button and choose the `Trained file format`. Specify the folder from the `Select train folder` button and press the `Train button`. @@ -137,6 +142,8 @@ Inference results are listed in Result images. Change the Threshold and press the `Test button` again +If you want to benchmark the inference, set `Benchmark mode` + ## Reference [PaDiM-Anomaly-Detection-Localization-master](https://github.com/xiahaifeng1995/PaDiM-Anomaly-Detection-Localization-master) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index 68e6d2810..a5e9bd713 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -49,8 +49,8 @@ help='arch model.' ) parser.add_argument( - '-f', '--feat', metavar="PICKLE_FILE", default=None, - help='train set feature pkl files.' + '-f', '--feat', metavar="FILE", default=None, + help='train set feature files.' ) parser.add_argument( '-bs', '--batch_size', default=32, @@ -88,6 +88,15 @@ '--compare_optimization', type=bool, default=False, help='Flag to compare output of optimization with original code' ) +parser.add_argument( + '--compare_optimization', type=bool, default=False, + help='Flag to compare output of optimization with original code' +) + +parser.add_argument( + '--save_format', metavar="FILE", default="pkl", + help='chose training file format pt, npy or pkl.' +) args = update_parser(parser) if args.compare_optimization: @@ -98,7 +107,9 @@ import torch if torch.cuda.is_available() : device = torch.device("cuda") - + + elif torch.backends.mps.is_available(): + device = torch.device("mps") else: device = torch.device("cpu") logger.info("Torch device : " + str(device)) @@ -189,18 +200,18 @@ def infer_init_run(net, params, train_outputs, IMAGE_SIZE): def train_from_image_or_video(net, params): # training - train_outputs = training(net, params, IMAGE_RESIZE, IMAGE_SIZE, KEEP_ASPECT, int(args.batch_size), args.train_dir, args.aug, args.aug_num, args.seed, logger) - + if args.enable_optimization: + train_outputs = training_optimized(net, params, IMAGE_RESIZE, IMAGE_SIZE, KEEP_ASPECT, int(args.batch_size), args.train_dir, args.aug, args.aug_num, args.seed, logger) + else: + train_outputs = training(net, params, IMAGE_RESIZE, IMAGE_SIZE, KEEP_ASPECT, int(args.batch_size), args.train_dir, args.aug, args.aug_num, args.seed, logger) # save learned distribution if args.feat: train_feat_file = args.feat else: train_dir = args.train_dir - train_feat_file = "%s.pkl" % os.path.basename(train_dir) - logger.info('saving train set feature to: %s ...' % train_feat_file) - with open(train_feat_file, 'wb') as f: - pickle.dump(train_outputs, f) - logger.info('saved.') + train_feat_file = str(os.path.basename(train_dir))+"."+str(args.save_format) + + train_outputs=_save_training_flie(train_feat_file, args.save_format, train_outputs) return train_outputs @@ -381,29 +392,12 @@ def infer_from_video(net, params, train_outputs, threshold): def train_and_infer(net, params): timestart=time.time() if args.feat: - logger.info('loading train set feature from: %s' % args.feat) - with open(args.feat, 'rb') as f: - train_outputs = pickle.load(f) - if args.compare_optimization: - train_output_list.append(train_outputs) - if args.enable_optimization: - train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], - torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] - if args.compare_optimization: - train_output_list.append(train_outputs) - + train_outputs=_load_training_file(args.feat, args.save_format) logger.info('loaded.') else: train_outputs = train_from_image_or_video(net, params) - if args.compare_optimization: - train_output_list.append(train_outputs) - if args.enable_optimization: - train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], - torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] - if args.compare_optimization: - train_output_list.append(train_outputs) - if args.threshold is None: + if args.threshold is None: if args.video: threshold = 0.5 gt_imgs = None @@ -424,9 +418,154 @@ def train_and_infer(net, params): infer_from_image(net, params, train_outputs, threshold, gt_imgs) logger.info('Script finished successfully.') +def _save_training_flie(train_feat_file, save_format, train_outputs): + if args.compare_optimization: + train_output_list.append(train_outputs) + + if not args.enable_optimization: + if save_format == "pkl" : + if train_feat_file==None: + print("PKL FOREVA") + train_feat_file = "train.pkl" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "npy" : + + for i, output in enumerate(train_outputs): + if train_feat_file==None: + train_feat_file = "train_output_"+str(i)+".npy" + else: + train_feat_file = train_feat_file.split(".")[0].strip()+str(i)+".npy" + + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"train_output_{i}.npy", output) + logger.info('Saved.') + elif save_format == "pt": + if train_feat_file==None: + train_feat_file = "train.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') + + + else: + if save_format=="npy": + for i, output in enumerate(train_outputs): + if train_feat_file==None: + train_feat_file = "train_output_"+str(i)+".npy" + else: + train_feat_file = train_feat_file.split(".")[0].strip()+str(i)+".npy" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"train_output_{i}.npy", output) + logger.info('Saved.') + + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + if save_format == "pkl" : + if train_feat_file==None: + train_feat_file = "trainOptimized.pkl" + logger.info('saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "pt": + if train_feat_file==None: + train_feat_file = "trainOptimized.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') + + + if args.compare_optimization: + train_output_list.append(train_outputs) + + return train_outputs + +def _load_training_file(train_feat_file, save_format): + if _check_file_exists(train_feat_file, save_format): + if not train_feat_file: + train_feat_file = "trainOptimized."+save_format + else: + save_format=train_feat_file.split(".")[1].strip() + logger.info("Save format ", save_format) + + if args.enable_optimization: + + + if save_format== "pkl": + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + if train_feat_file: + train_feat_file=train_feat_file.split(".")[0].strip() + else: + train_feat_file="train_output_" + while True: + try: + logger.info(f"{train_feat_file}_{i}.npy") + train_outputs.append(np.load(f"{train_feat_file}{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + elif save_format == "pt": + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + else: + train_feat_file = "train."+save_format + if save_format == "pkl": + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + while True: + try: + logger.info(f"Loading train_output_{i}.npy") + train_outputs.append(np.load(f"train_output_{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + + elif save_format == "pt": + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + train_outputs_numpy = [] + if train_outputs[0] is torch.Tensor: + for item in train_outputs: + if isinstance(item, torch.Tensor): + train_outputs_numpy.append(item.cpu().numpy()) # Move to CPU and convert to NumPy + else: + train_outputs_numpy.append(item) + return train_outputs_numpy + return train_outputs + +def _check_file_exists(train_feat_file, save_format): + if train_feat_file == None: + if save_format=="npy": + filename="train_output_0.npy" + elif args.enable_optimization: + filename="trainOptimized."+save_format + else: + filename="train."+save_format + if not os.path.isfile(filename): + logger.info(f"File {filename} does not exist. Unable to load the model") + else: + return os.path.isfile(train_feat_file) + + return os.path.isfile(filename) + def main(): # model files check and download + starttime=time.time() weight_path, model_path, params = get_params(args.arch) check_and_download_models(weight_path, model_path, REMOTE_PATH) @@ -435,6 +574,7 @@ def main(): # check input train_and_infer(net, params) + logger.info('Script finished execution time: '+str(int((time.time()-starttime)*1000))) if __name__ == '__main__': diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index d9ebd476c..ddd570fe4 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -40,6 +40,7 @@ model_index = 0 slider_index = 50 + REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/padim/' train_folder = None @@ -47,6 +48,9 @@ test_type = "folder" test_roi = None score_cache = {} +EnableOptimization = True +save_format="pkl" +BENCHMARK = False # ====================== # List box cursor changed @@ -91,6 +95,39 @@ def slider_changed(event): global scale, slider_index slider_index = scale.get() +def enable_optimization(event): + global EnableOptimization + selection = event.widget.curselection() + if selection: + selected_index = selection[0] + selected_value = event.widget.get(selected_index) + EnableOptimization = (selected_value == "True") + else: + EnableOptimization = False + logger.info(f"EnableOptimization set to: {EnableOptimization}") + +def save_type_select(event): + global save_format + selection = event.widget.curselection() + if selection: + selected_index = selection[0] + selected_value = event.widget.get(selected_index) + save_format = selected_value + else: + save_format = "pkl" + logger.info(f"Selected format set to: {save_format}") + +def enable_benchmark(event): + global BENCHMARK + selection = event.widget.curselection() + if selection: + selected_index = selection[0] + selected_value = event.widget.get(selected_index) + BENCHMARK = (selected_value == "True") + else: + BENCHMARK = False + logger.info(f"BENCHMARK set to: {BENCHMARK}") + # ====================== # List box double click # ====================== @@ -147,7 +184,7 @@ def create_photo_image(path,w=CANVAS_W,h=CANVAS_H): #image_bgr = cv2.resize(image_bgr,(w,h)) image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) # imreadはBGRなのでRGBに変換 image_pil = Image.fromarray(image_rgb) # RGBからPILフォーマットへ変換 - image_pil.thumbnail((w,h), Image.ANTIALIAS) + image_pil.thumbnail((w,h), Image.LANCZOS) image_tk = ImageTk.PhotoImage(image_pil) # ImageTkフォーマットへ変換 return image_tk @@ -226,16 +263,51 @@ def train_button_clicked(): aug = False aug_num = 0 seed = 1024 - train_outputs = training(net, params, get_image_resize(), get_image_crop_size(), get_keep_aspect(), batch_size, train_dir, aug, aug_num, seed, logger) + train_outputs=training_optimized(net, params, get_image_resize(), get_image_crop_size(), get_keep_aspect(), batch_size, train_dir, aug, aug_num, seed, logger) + if not EnableOptimization: + if save_format == "pkl" : + train_feat_file = "train.pkl" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "npy" : + + for i, output in enumerate(train_outputs): + train_feat_file = "train_output_"+str(i)+".npy" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"train_output_{i}.npy", output) + logger.info('Saved.') + elif save_format == "pt": + train_feat_file = "train.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') - # save learned distribution - train_feat_file = "train.pkl" - #train_dir = args.train_dir - #train_feat_file = "%s.pkl" % os.path.basename(train_dir) - logger.info('saving train set feature to: %s ...' % train_feat_file) - with open(train_feat_file, 'wb') as f: - pickle.dump(train_outputs, f) - logger.info('saved.') + else: + if save_format=="npy": + for i, output in enumerate(train_outputs): + train_feat_file = "train_output_"+str(i)+".npy" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"train_output_{i}.npy", output) + logger.info('Saved.') + + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + if save_format == "pkl" : + train_feat_file = "trainOptimized.pkl" + logger.info('saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "pt": + train_feat_file = "trainOptimized.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') + + + global score_cache score_cache = {} @@ -259,11 +331,57 @@ def test_button_clicked(): # create net instance env_id = ailia.get_gpu_environment_id() net = ailia.Net(model_path, weight_path, env_id=env_id) + if _check_file_exists(save_format): + if EnableOptimization: + train_feat_file = "trainOptimized."+save_format + + if save_format== "pkl": + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + while True: + try: + logger.info(f"Loading train_output_{i}.npy") + train_outputs.append(np.load(f"train_output_{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + elif save_format == "pt": + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + else: + train_feat_file = "train."+save_format + if save_format == "pkl": + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + while True: + try: + logger.info(f"Loading train_output_{i}.npy") + train_outputs.append(np.load(f"train_output_{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + + elif save_format == "pt": + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + train_outputs_numpy = [] + if train_outputs[0] is torch.Tensor: + for item in train_outputs: + if isinstance(item, torch.Tensor): + train_outputs_numpy.append(item.cpu().numpy()) # Move to CPU and convert to NumPy + else: + train_outputs_numpy.append(item) - # load trained model - with open("train.pkl", 'rb') as f: - train_outputs = pickle.load(f) - threshold = slider_index / 100.0 if test_type == "folder": @@ -271,6 +389,18 @@ def test_button_clicked(): else: test_from_video(net, params, train_outputs, threshold) +def _check_file_exists(save_format): + if save_format=="npy": + filename="train_output_0.npy" + elif EnableOptimization: + filename="trainOptimized."+save_format + else: + filename="train."+save_format + if not os.path.isfile(filename): + logger.info(f"File {filename} does not exist. Unable to load the model") + + return os.path.isfile(filename) + def test_from_folder(net, params, train_outputs, threshold): # file loop test_imgs = [] @@ -282,7 +412,7 @@ def test_from_folder(net, params, train_outputs, threshold): roi_img = preprocess(roi_img, get_image_resize(), keep_aspect=get_keep_aspect(), crop_size=get_image_crop_size(), mask=True) else: roi_img = None - + score_map = [] for i_img in range(0, len(test_list)): logger.info('from (%s) ' % (test_list[i_img])) @@ -296,7 +426,34 @@ def test_from_folder(net, params, train_outputs, threshold): if image_path in score_cache: dist_tmp = score_cache[image_path].copy() else: - dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) + if BENCHMARK: + import time + logger.info('BENCHMARK mode') + total_time = 0 + if EnableOptimization: + for i in range(6): + start = int(round(time.time() * 1000)) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / 5} ms') + else: + for i in range(6): + start = int(round(time.time() * 1000)) + dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / 5} ms') + + else: + if EnableOptimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + else: + dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) score_cache[image_path] = dist_tmp.copy() score_map.append(dist_tmp) @@ -345,8 +502,10 @@ def test_from_video(net, params, train_outputs, threshold): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = preprocess(img, get_image_resize(), keep_aspect=get_keep_aspect(), crop_size=get_image_crop_size()) - - dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) + if EnableOptimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + else: + dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) score_map.append(dist_tmp) roi_img = None @@ -537,6 +696,8 @@ def get_model_id_list(): def get_model_resolution_list(): return [224, 448, 224] +def get_optimization_list(): + return [True, False] # ====================== # GUI # ====================== @@ -552,6 +713,15 @@ def main(): global outputFile, listsOutput, output_list, ListboxOutput global listsModel, ListboxModel global valueKeepAspect, valueCenterCrop + if EnableOptimization: + import torch + global device + if torch.cuda.is_available() : + device = torch.device("cuda") + + else: + device = torch.device("cpu") + logger.info("Torch device : " + str(device)) # rootメインウィンドウの設定 root = tk.Tk() @@ -567,18 +737,24 @@ def main(): test_list = get_test_file_list() result_list = get_result_file_list() model_list = get_model_list() + listsInput = tk.StringVar(value=train_list) listsOutput = tk.StringVar(value=test_list) listsResult = tk.StringVar(value=result_list) listsModel = tk.StringVar(value=model_list) + + # 各種ウィジェットの作成 ListboxInput = tk.Listbox(frame, listvariable=listsInput, width=20, height=12, selectmode=tk.BROWSE, exportselection=False) ListboxOutput = tk.Listbox(frame, listvariable=listsOutput, width=20, height=12, selectmode=tk.BROWSE, exportselection=False) ListboxResult = tk.Listbox(frame, listvariable=listsResult, width=20, height=12, selectmode=tk.BROWSE, exportselection=False) ListboxModel = tk.Listbox(frame, listvariable=listsModel, width=20, height=6, selectmode=tk.BROWSE, exportselection=False) + + # Bind the listbox selection event to the toggle_optimization function + ListboxInput.bind("<>", input_changed) ListboxOutput.bind("<>", output_changed) ListboxResult.bind("<>", result_changed) @@ -587,11 +763,13 @@ def main(): ListboxInput.bind("", input_double_click) ListboxOutput.bind("", output_double_click) ListboxResult.bind("", result_double_click) + ListboxInput.select_set(input_index) ListboxOutput.select_set(output_index) ListboxResult.select_set(result_index) ListboxModel.select_set(model_index) + textRun = tk.StringVar(frame) textRun.set("Train") @@ -650,6 +828,15 @@ def main(): textSave = tk.StringVar(frame) textSave.set("Save images") + textOptimization = tk.StringVar(frame) + textOptimization.set("Set optimization") + + textSaveFile = tk.StringVar(frame) + textSaveFile.set("Trained file format") + + textBenchmark = tk.StringVar(frame) + textBenchmark.set("Benchmark mode") + valueKeepAspect = tkinter.BooleanVar() valueKeepAspect.set(True) valueCenterCrop = tkinter.BooleanVar() @@ -667,6 +854,10 @@ def main(): labelModel = tk.Label(frame, textvariable=textModel) labelTestSettings = tk.Label(frame, textvariable=textTestSettings) labelSlider = tk.Label(frame, textvariable=textSlider) + labelOPt = tk.Label(frame, textvariable=textOptimization) + labelSaveFile = tk.Label(frame, textvariable=textSaveFile) + labelBenchmark = tk.Label(frame, textvariable=textBenchmark) + buttonTrain = tk.Button(frame, textvariable=textRun, command=train_button_clicked, width=14) buttonTest = tk.Button(frame, textvariable=textStop, command=test_button_clicked, width=14) @@ -738,12 +929,55 @@ def main(): labelSlider.grid(row=9, column=4, sticky=tk.NW, columnspan=3) scale.grid(row=10, column=4, sticky=tk.NW, columnspan=3) + options = ["False", "True"] + listsOPt = tk.StringVar(value=options) + labelOPt.grid(row=11, column=0, sticky=tk.NW, columnspan=3) + ListboxOptimization = tk.Listbox(frame, listvariable=listsOPt, width=20, height=len(options), selectmode=tk.BROWSE, exportselection=False) + ListboxOptimization.grid(row=11, column=0, padx=0, pady=20) + + # Set the initial selection in the Listbox + initial_selection = options.index("True") # Default to "True" + ListboxOptimization.select_set(initial_selection) + ListboxOptimization.event_generate('<>') # Trigger the event to set the initial state + + # Bind the listbox selection event to the toggle_optimization function + ListboxOptimization.bind('<>', enable_optimization) + + fileOptions = ["pkl", "pt", "npy"] + listsFileOPt = tk.StringVar(value=fileOptions) + labelSaveFile.grid(row=12, column=0, sticky=tk.NW, columnspan=3) + ListboxFileSelect = tk.Listbox(frame, listvariable=listsFileOPt, width=20, height=len(fileOptions), selectmode=tk.BROWSE, exportselection=False) + ListboxFileSelect.grid(row=12, column=0, padx=0, pady=20) + + # Set the initial selection in the Listbox + initial_selectionFile = fileOptions.index("pkl") # Default to "pkl" + ListboxFileSelect.select_set(initial_selectionFile) + ListboxFileSelect.event_generate('<>') # Trigger the event to set the initial state + + # Bind the listbox selection event to the save_type_select function + ListboxFileSelect.bind('<>', save_type_select) + + fileBenchmark = ["True", "False"] + listsFileBenchmark = tk.StringVar(value=fileBenchmark) + labelBenchmark.grid(row=12, column=4, sticky=tk.NW) + ListboxFileBenchmark = tk.Listbox(frame, listvariable=listsFileBenchmark, width=20, height=len(fileOptions), selectmode=tk.BROWSE, exportselection=False) + ListboxFileBenchmark.grid(row=12, column=4, padx=0, pady=20) + + # Set the initial selection in the Listbox + initial_selection_benchmark = fileBenchmark.index("False") # Default to "False" + ListboxFileBenchmark.select_set(initial_selection_benchmark) + ListboxFileBenchmark.event_generate('<>') # Trigger the event to set the initial state + + # Bind the listbox selection event to the save_type_select function + ListboxFileBenchmark.bind('<>', enable_benchmark) # メインフレームの作成と設置 frame = ttk.Frame(root) frame.pack(padx=20, pady=10) root.mainloop() + + if __name__ == '__main__': main() From eabcd4ea4227edc6c99c2505eb1133aeeb33ce74 Mon Sep 17 00:00:00 2001 From: YToleubay <70749179+YToleubay@users.noreply.github.com> Date: Fri, 31 May 2024 12:49:34 +0900 Subject: [PATCH 05/15] fixed typo in README.md --- anomaly_detection/padim/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anomaly_detection/padim/README.md b/anomaly_detection/padim/README.md index 9c9ce4849..5df56d8eb 100644 --- a/anomaly_detection/padim/README.md +++ b/anomaly_detection/padim/README.md @@ -71,7 +71,7 @@ You can specify the directory of normal product files with the `--train_dir` opt $ python3 padim.py --train_dir train ``` -The feature vectors created from files in the train directory can be saved to the pickle, numpy or pickle file. For that you have to specify desired format as `npy`, `pt` or `pkl`. Depending on --enable_optimization flag, files saved might be originally pytorch tensors (True) or numpy arrays (False). +The feature vectors created from files in the train directory can be saved to the pickle, numpy or pytorch file. For that you have to specify desired format as `npy`, `pt` or `pkl`. Depending on --enable_optimization flag, files saved might be originally pytorch tensors (True) or numpy arrays (False). ```bash $ python3 padim.py --save_format pt ``` From eb87974bd0b07406f0201d678df4d4a09788becd Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 3 Jun 2024 14:02:59 +0900 Subject: [PATCH 06/15] Fixed issue with file loading and saving --- anomaly_detection/padim/padim.py | 43 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index a5e9bd713..46f4c4a15 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -425,22 +425,22 @@ def _save_training_flie(train_feat_file, save_format, train_outputs): if not args.enable_optimization: if save_format == "pkl" : if train_feat_file==None: - print("PKL FOREVA") train_feat_file = "train.pkl" logger.info('Saving train set feature to: %s ...' % train_feat_file) with open(train_feat_file, 'wb') as f: pickle.dump(train_outputs, f) logger.info('Saved.') elif save_format == "npy" : - + filename=train_feat_file.split(".")[0].strip() for i, output in enumerate(train_outputs): + if train_feat_file==None: train_feat_file = "train_output_"+str(i)+".npy" else: - train_feat_file = train_feat_file.split(".")[0].strip()+str(i)+".npy" + train_feat_file = filename+str("_")+str(i)+".npy" logger.info('Saving train set feature to: %s ...' % train_feat_file) - np.save(f"train_output_{i}.npy", output) + np.save(f"{filename}_{i}.npy", output) logger.info('Saved.') elif save_format == "pt": if train_feat_file==None: @@ -452,13 +452,14 @@ def _save_training_flie(train_feat_file, save_format, train_outputs): else: if save_format=="npy": + filename=train_feat_file.split(".")[0].strip() for i, output in enumerate(train_outputs): if train_feat_file==None: train_feat_file = "train_output_"+str(i)+".npy" else: - train_feat_file = train_feat_file.split(".")[0].strip()+str(i)+".npy" + train_feat_file = filename+"_"+str(i)+".npy" logger.info('Saving train set feature to: %s ...' % train_feat_file) - np.save(f"train_output_{i}.npy", output) + np.save(f"{filename}_{i}.npy", output) logger.info('Saved.') train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], @@ -489,7 +490,7 @@ def _load_training_file(train_feat_file, save_format): train_feat_file = "trainOptimized."+save_format else: save_format=train_feat_file.split(".")[1].strip() - logger.info("Save format ", save_format) + logger.info(f"Save format {save_format}") if args.enable_optimization: @@ -502,13 +503,15 @@ def _load_training_file(train_feat_file, save_format): train_outputs = [] i = 0 if train_feat_file: - train_feat_file=train_feat_file.split(".")[0].strip() + train_feat_file=train_feat_file.split("_")[0].strip() else: - train_feat_file="train_output_" + train_feat_file="train_" + while True: try: + logger.info(f"{train_feat_file}_{i}.npy") - train_outputs.append(np.load(f"{train_feat_file}{i}.npy", allow_pickle=True)) + train_outputs.append(np.load(f"{train_feat_file}_{i}.npy", allow_pickle=True)) i += 1 except FileNotFoundError: break # Stop when there are no more files to load @@ -518,23 +521,30 @@ def _load_training_file(train_feat_file, save_format): logger.info(f"Loading {train_feat_file}") train_outputs = torch.load(train_feat_file) else: - train_feat_file = "train."+save_format + if save_format == "pkl": + train_feat_file = "train."+save_format logger.info(f"Loading {train_feat_file}") with open(train_feat_file, 'rb') as f: train_outputs = pickle.load(f) elif save_format == "npy": train_outputs = [] i = 0 + if train_feat_file: + train_feat_file=train_feat_file.split("_")[0].strip() + else: + train_feat_file="train_" while True: try: - logger.info(f"Loading train_output_{i}.npy") - train_outputs.append(np.load(f"train_output_{i}.npy", allow_pickle=True)) + + logger.info(f"{train_feat_file}_{i}.npy") + train_outputs.append(np.load(f"{train_feat_file}_{i}.npy", allow_pickle=True)) i += 1 except FileNotFoundError: - break # Stop when there are no more files to load + break # Stop when there are no more files to load elif save_format == "pt": + train_feat_file = "train."+save_format logger.info(f"Loading {train_feat_file}") train_outputs = torch.load(train_feat_file) train_outputs_numpy = [] @@ -545,7 +555,10 @@ def _load_training_file(train_feat_file, save_format): else: train_outputs_numpy.append(item) return train_outputs_numpy - return train_outputs + return train_outputs + else: + logger.info(f"filename {train_feat_file} have not been found") + def _check_file_exists(train_feat_file, save_format): if train_feat_file == None: From b6fd11ed84ceb06573c43871cc7eda72187579f5 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 3 Jun 2024 15:52:59 +0900 Subject: [PATCH 07/15] added support of optimization device selection --- anomaly_detection/padim/padim.py | 28 ++++++++++++++++---------- anomaly_detection/padim/padim_utils.py | 8 +++++++- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index 46f4c4a15..34b206d7c 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -97,6 +97,12 @@ '--save_format', metavar="FILE", default="pkl", help='chose training file format pt, npy or pkl.' ) + +parser.add_argument( + '--optimization_device', metavar="device", default='cpu', choices=('cpu', 'cuda', 'mps'), + help='chose optimization device' +) + args = update_parser(parser) if args.compare_optimization: @@ -105,10 +111,10 @@ if args.enable_optimization: import torch - if torch.cuda.is_available() : + if torch.cuda.is_available() and args.optimization_device=="cuda" : device = torch.device("cuda") - elif torch.backends.mps.is_available(): + elif torch.backends.mps.is_available() and args.optimization_device=="mps" : device = torch.device("mps") else: device = torch.device("cpu") @@ -192,7 +198,7 @@ def infer_init_run(net, params, train_outputs, IMAGE_SIZE): dummy_image = dummy_image.astype(np.float32) logger.info(f"PaDiM initialization inference starts!") if args.enable_optimization: - score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device) + score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device, logger) else: score = infer(net, params, train_outputs, dummy_image, IMAGE_SIZE) logger.info(f"PaDiM initialization inference finish!") @@ -246,7 +252,7 @@ def decide_threshold_from_gt_image(net, params, train_outputs, gt_imgs): img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) if args.enable_optimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) else: dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) @@ -284,7 +290,7 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): if args.enable_optimization: for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') if i != 0: @@ -300,16 +306,16 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): total_time = total_time + (end - start) logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') if args.compare_optimization: - logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device))}') + logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger))}') else: if args.enable_optimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) else: dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) if args.compare_optimization: - logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device)))) + logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger)))) score_map.append(dist_tmp) @@ -345,7 +351,7 @@ def infer_from_video(net, params, train_outputs, threshold): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) - dist_tmp = infer_optimized(net, params, train_outputs, img, device) + dist_tmp = infer_optimized(net, params, train_outputs, img, device,logger) score_map.append(dist_tmp) scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production @@ -461,9 +467,9 @@ def _save_training_flie(train_feat_file, save_format, train_outputs): logger.info('Saving train set feature to: %s ...' % train_feat_file) np.save(f"{filename}_{i}.npy", output) logger.info('Saved.') - + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], - torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] if save_format == "pkl" : if train_feat_file==None: train_feat_file = "trainOptimized.pkl" diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 54319deb4..19166bf30 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -18,6 +18,7 @@ from skimage.segmentation import mark_boundaries import torch + WEIGHT_RESNET18_PATH = 'resnet18.onnx' MODEL_RESNET18_PATH = 'resnet18.onnx.prototxt' WEIGHT_WIDE_RESNET50_2_PATH = 'wide_resnet50_2.onnx' @@ -487,7 +488,7 @@ def infer(net, params, train_outputs, img, crop_size): return dist_tmp -def infer_optimized(net, params, train_outputs, img, crop_size, device): +def infer_optimized(net, params, train_outputs, img, crop_size, device, logger): # prepare input data imgs = [] imgs.append(img) @@ -519,6 +520,11 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device): mean_vectors = train_outputs[0] inv_cov_matrices = train_outputs[2] samples = embedding_vectors[0] + if mean_vectors.device!=device: + logger.info(f"Changing device from {mean_vectors.device} to {device}") + mean_vectors=mean_vectors.to(device) + samples=samples.to(device) + inv_cov_matrices=inv_cov_matrices.to(device) # Step 1: Compute the difference between each sample and its corresponding mean differences = samples - mean_vectors # Step 2: Apply the inverse covariance matrix From 46dfab8dbab88fe5168685f6caa9e8f059af3368 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 3 Jun 2024 16:27:42 +0900 Subject: [PATCH 08/15] changed mps device check --- anomaly_detection/padim/padim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index 34b206d7c..b1af42ee7 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -111,10 +111,10 @@ if args.enable_optimization: import torch - if torch.cuda.is_available() and args.optimization_device=="cuda" : + if args.optimization_device=="cuda" and torch.cuda.is_available(): device = torch.device("cuda") - elif torch.backends.mps.is_available() and args.optimization_device=="mps" : + elif args.optimization_device=="mps" and torch.backends.mps.is_available(): device = torch.device("mps") else: device = torch.device("cpu") From 737dbcff793466d71028e00061cc932d58414382 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 3 Jun 2024 17:24:35 +0900 Subject: [PATCH 09/15] fixed issue with infer_optimized --- anomaly_detection/padim/padim_gui.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index ddd570fe4..517dd4e4b 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -433,7 +433,7 @@ def test_from_folder(net, params, train_outputs, threshold): if EnableOptimization: for i in range(6): start = int(round(time.time() * 1000)) - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') if i != 0: @@ -451,7 +451,7 @@ def test_from_folder(net, params, train_outputs, threshold): else: if EnableOptimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) else: dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) score_cache[image_path] = dist_tmp.copy() @@ -503,7 +503,7 @@ def test_from_video(net, params, train_outputs, threshold): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = preprocess(img, get_image_resize(), keep_aspect=get_keep_aspect(), crop_size=get_image_crop_size()) if EnableOptimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) else: dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) From bae3d2f74373cdfcdb800665538323f51eee08df Mon Sep 17 00:00:00 2001 From: YToleubay Date: Thu, 6 Jun 2024 15:14:33 +0900 Subject: [PATCH 10/15] added MPS to padim_gui --- anomaly_detection/padim/padim_gui.py | 52 ++++++++++++++++++++------ anomaly_detection/padim/padim_utils.py | 2 +- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index 517dd4e4b..d215ed66a 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -51,6 +51,12 @@ EnableOptimization = True save_format="pkl" BENCHMARK = False +selected_device = "cpu" +if EnableOptimization: + import torch + + device = torch.device("cpu") + logger.info("Torch device : " + str(device)) # ====================== # List box cursor changed @@ -128,6 +134,17 @@ def enable_benchmark(event): BENCHMARK = False logger.info(f"BENCHMARK set to: {BENCHMARK}") +def select_device(event): + global device + selection = event.widget.curselection() + if selection: + selected_index = selection[0] + selected_value = event.widget.get(selected_index) + device = torch.device(selected_value) + else: + device = torch.device("cpu") + logger.info(f"Device set to: {device}") + # ====================== # List box double click # ====================== @@ -713,15 +730,7 @@ def main(): global outputFile, listsOutput, output_list, ListboxOutput global listsModel, ListboxModel global valueKeepAspect, valueCenterCrop - if EnableOptimization: - import torch - global device - if torch.cuda.is_available() : - device = torch.device("cuda") - - else: - device = torch.device("cpu") - logger.info("Torch device : " + str(device)) + # rootメインウィンドウの設定 root = tk.Tk() @@ -837,6 +846,9 @@ def main(): textBenchmark = tk.StringVar(frame) textBenchmark.set("Benchmark mode") + textDevice = tk.StringVar(frame) + textDevice.set("Optimization device") + valueKeepAspect = tkinter.BooleanVar() valueKeepAspect.set(True) valueCenterCrop = tkinter.BooleanVar() @@ -857,6 +869,7 @@ def main(): labelOPt = tk.Label(frame, textvariable=textOptimization) labelSaveFile = tk.Label(frame, textvariable=textSaveFile) labelBenchmark = tk.Label(frame, textvariable=textBenchmark) + labelDevice = tk.Label(frame, textvariable=textDevice) buttonTrain = tk.Button(frame, textvariable=textRun, command=train_button_clicked, width=14) @@ -957,11 +970,28 @@ def main(): # Bind the listbox selection event to the save_type_select function ListboxFileSelect.bind('<>', save_type_select) + + fileDevice = ["cpu", "cuda", "mps"] + listsFileDevice = tk.StringVar(value=fileDevice) + labelDevice.grid(row=11, column=4, sticky=tk.NW) + ListboxDeviceSelect = tk.Listbox(frame, listvariable=listsFileDevice, width=20, height=len(fileDevice), selectmode=tk.BROWSE, exportselection=False) + ListboxDeviceSelect.grid(row=11, column=4, padx=0, pady=20) + + # Set the initial selection in the Listbox + initial_selection_Device = fileDevice.index("cpu") # Default to "False" + ListboxDeviceSelect.select_set(initial_selection_Device) + ListboxDeviceSelect.event_generate('<>') # Trigger the event to set the initial state + + # Bind the listbox selection event to the save_type_select function + ListboxDeviceSelect.bind('<>', select_device) + # メインフレームの作成と設置 + + fileBenchmark = ["True", "False"] listsFileBenchmark = tk.StringVar(value=fileBenchmark) - labelBenchmark.grid(row=12, column=4, sticky=tk.NW) + labelBenchmark.grid(row=11, column=5, sticky=tk.NW) ListboxFileBenchmark = tk.Listbox(frame, listvariable=listsFileBenchmark, width=20, height=len(fileOptions), selectmode=tk.BROWSE, exportselection=False) - ListboxFileBenchmark.grid(row=12, column=4, padx=0, pady=20) + ListboxFileBenchmark.grid(row=11, column=5, padx=0, pady=20) # Set the initial selection in the Listbox initial_selection_benchmark = fileBenchmark.index("False") # Default to "False" diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 19166bf30..8983d0777 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -520,7 +520,7 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device, logger): mean_vectors = train_outputs[0] inv_cov_matrices = train_outputs[2] samples = embedding_vectors[0] - if mean_vectors.device!=device: + if str(device) not in str(mean_vectors.device): logger.info(f"Changing device from {mean_vectors.device} to {device}") mean_vectors=mean_vectors.to(device) samples=samples.to(device) From b530115ce0794b8187bc69e0524c20e7780fa7f7 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 1 Jul 2024 16:11:20 +0900 Subject: [PATCH 11/15] added limit to memory allocation size by gpu to improve performance --- anomaly_detection/padim/padim_gui.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index d215ed66a..20ee74bb4 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -141,8 +141,12 @@ def select_device(event): selected_index = selection[0] selected_value = event.widget.get(selected_index) device = torch.device(selected_value) + if selected_value=="cuda:0": + torch.cuda.set_per_process_memory_fraction(fraction=0.5, device="cuda:0") else: device = torch.device("cpu") + + logger.info(f"Device set to: {device}") # ====================== @@ -971,7 +975,7 @@ def main(): ListboxFileSelect.bind('<>', save_type_select) - fileDevice = ["cpu", "cuda", "mps"] + fileDevice = ["cpu", "cuda:0", "mps"] listsFileDevice = tk.StringVar(value=fileDevice) labelDevice.grid(row=11, column=4, sticky=tk.NW) ListboxDeviceSelect = tk.Listbox(frame, listvariable=listsFileDevice, width=20, height=len(fileDevice), selectmode=tk.BROWSE, exportselection=False) From 6a4b09b2a7479aa35e08f9e900a333f13b36585e Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 8 Jul 2024 12:01:20 +0900 Subject: [PATCH 12/15] added custom gausian filter converted rest of the pipeline to torch --- anomaly_detection/padim/padim_gui.py | 52 ++- anomaly_detection/padim/padim_utils.py | 81 +++- anomaly_detection/padim_old/padim.py | 605 +++++++++++++++++++++++++ 3 files changed, 720 insertions(+), 18 deletions(-) create mode 100644 anomaly_detection/padim_old/padim.py diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index 20ee74bb4..6452c1df5 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -56,6 +56,9 @@ import torch device = torch.device("cpu") + weights_torch=gaussian_kernel1d_torch(4, 0, int(4.0*float(4)+0.5), device).unsqueeze(0).unsqueeze(0).expand(1, 1, 33) + + logger.info("Torch device : " + str(device)) # ====================== @@ -135,12 +138,14 @@ def enable_benchmark(event): logger.info(f"BENCHMARK set to: {BENCHMARK}") def select_device(event): - global device + global device, weights_torch selection = event.widget.curselection() if selection: selected_index = selection[0] selected_value = event.widget.get(selected_index) device = torch.device(selected_value) + weights_torch = gaussian_kernel1d_torch(4, 0, int(4.0*float(4)+0.5), device).unsqueeze(0).unsqueeze(0).expand(1, 1, 33) + if selected_value=="cuda:0": torch.cuda.set_per_process_memory_fraction(fraction=0.5, device="cuda:0") else: @@ -454,7 +459,7 @@ def test_from_folder(net, params, train_outputs, threshold): if EnableOptimization: for i in range(6): start = int(round(time.time() * 1000)) - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger, weights_torch) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') if i != 0: @@ -472,22 +477,34 @@ def test_from_folder(net, params, train_outputs, threshold): else: if EnableOptimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger, weights_torch) + score_cache[image_path] = dist_tmp else: dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) - score_cache[image_path] = dist_tmp.copy() + score_cache[image_path] = dist_tmp.copy() score_map.append(dist_tmp) - - scores = normalize_scores(score_map, get_image_crop_size(), roi_img) - anormal_scores = calculate_anormal_scores(score_map, get_image_crop_size()) + if EnableOptimization: + scores = normalize_scores_torch(score_map, get_image_crop_size()).squeeze(0).cpu().numpy() + anormal_scores = calculate_anormal_scores_torch(score_map, get_image_crop_size()) + else: + scores = normalize_scores(score_map, get_image_crop_size(), roi_img) + anormal_scores = calculate_anormal_scores(score_map, get_image_crop_size()) # Plot gt image os.makedirs("result", exist_ok=True) global result_list, listsResult, ListboxResult result_list = [] - for i in range(0, scores.shape[0]): + scores_numpy=np.zeros(scores.shape) + for i in range(0, len(test_imgs)): img = denormalization(test_imgs[i]) heat_map, mask, vis_img = visualize(img, scores[i], threshold) + """ + if EnableOptimization: + scores_numpy[i] = scores[i].squeeze(0).cpu().numpy() + heat_map, mask, vis_img = visualize(img, scores[i].squeeze(0).cpu().numpy(), threshold) + else: + heat_map, mask, vis_img = visualize(img, scores[i], threshold) + """ frame = pack_visualize(heat_map, mask, vis_img, scores, get_image_crop_size()) dirname, path = os.path.split(test_list[i]) output_path = "result/"+path @@ -524,18 +541,23 @@ def test_from_video(net, params, train_outputs, threshold): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = preprocess(img, get_image_resize(), keep_aspect=get_keep_aspect(), crop_size=get_image_crop_size()) if EnableOptimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, get_image_crop_size(), device, logger, weights_torch), + score_map.append(dist_tmp) + roi_img = None + scores = normalize_scores_torch(score_map, get_image_crop_size(), roi_img) + + else: dist_tmp = infer(net, params, train_outputs, img, get_image_crop_size()) - score_map.append(dist_tmp) - roi_img = None - scores = normalize_scores(score_map, get_image_crop_size(), roi_img) # min max is calculated dynamically, please set fixed min max value from calibration data for production + score_map.append(dist_tmp) + roi_img = None + scores = normalize_scores(score_map, get_image_crop_size(), roi_img) # min max is calculated dynamically, please set fixed min max value from calibration data for production - heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) - frame = pack_visualize(heat_map, mask, vis_img, scores, get_image_crop_size()) + heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) + frame = pack_visualize(heat_map, mask, vis_img, scores, get_image_crop_size()) - cv2.imshow('frame', frame) + cv2.imshow('frame', frame) frame_shown = True if writer is not None: diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 8983d0777..76053b962 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -9,6 +9,7 @@ from image_utils import normalize_image # noqa: E402 from detector_utils import load_image # noqa: E402 + from scipy.spatial.distance import mahalanobis from scipy.ndimage import gaussian_filter @@ -17,6 +18,7 @@ from skimage import morphology from skimage.segmentation import mark_boundaries import torch +import torch.nn.functional as F WEIGHT_RESNET18_PATH = 'resnet18.onnx' @@ -488,7 +490,7 @@ def infer(net, params, train_outputs, img, crop_size): return dist_tmp -def infer_optimized(net, params, train_outputs, img, crop_size, device, logger): +def infer_optimized(net, params, train_outputs, img, crop_size, device, logger, weights_torch=None): # prepare input data imgs = [] imgs.append(img) @@ -531,18 +533,55 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device, logger): transformed_differences = torch.einsum('ijk,jk->ik', inv_cov_matrices, differences) # Step 3: Compute the Mahalanobis distance dist_tmp = torch.sqrt(torch.sum(differences * transformed_differences, dim=0)) - # upsample + + dist_tmp = dist_tmp.view(1, -1).view( H, W) + """ dist_tmp = dist_tmp.view(1, -1).view( H, W).cpu().numpy() + print("crop_size ", crop_size) dist_tmp = np.array(Image.fromarray(dist_tmp).resize( (crop_size, crop_size), resample=Image.BILINEAR) ) - + print("Shape before gausian filter: ", dist_tmp.shape) # apply gaussian smoothing on the score map dist_tmp = gaussian_filter(dist_tmp, sigma=4) + print("Shape after gausian filter: ", dist_tmp.shape) + """ + + dist_tmp=F.interpolate(dist_tmp.unsqueeze(0).unsqueeze(0), + size=(crop_size, crop_size), mode='bilinear', align_corners=False).squeeze(0) + dist_tmp=gausian_filter_torch(dist_tmp, weights_torch, mode='reflect') + + return dist_tmp +def gaussian_kernel1d_torch(sigma, order, radius, device): + """ + Computes a 1-D Gaussian convolution kernel. + """ + if order < 0: + raise ValueError('order must be non-negative') + exponent_range = torch.arange(order + 1, device=device) + sigma2 = sigma * sigma + x = torch.arange(-radius, radius + 1, dtype=torch.float64, device=device) + phi_x = torch.exp(-0.5 / sigma2 * x ** 2) + phi_x = phi_x / phi_x.sum() + + if order == 0: + return phi_x + +def gausian_filter_torch(input, weights, output=None, mode='constant', cval=0.0, origin=0): + + input=input.permute(2, 0,1 ).to(dtype=torch.float64) + input_padded=F.pad(input, pad=(16, 16), mode='reflect') + output1=torch.nn.functional.conv1d(input_padded, weights.to(dtype=torch.float64) ) #torch.Size([448, 1, 448]) + + input2=output1.permute(2, 1,0 ) + input_padded2=F.pad(input2, pad=(16, 16), mode='reflect', ) + output2=torch.nn.functional.conv1d(input_padded2, weights.to(dtype=torch.float64) ).permute(1, 0,2 ) + return output2 + def normalize_scores(score_map, crop_size, roi_img = None): N = len(score_map) @@ -562,6 +601,27 @@ def normalize_scores(score_map, crop_size, roi_img = None): return scores +def normalize_scores_torch(score_map, crop_size, roi_img=None): + """ + score_map is list of torch tensors + crop size int + """ + # Convert list of tensors to a single tensor + score_map = torch.stack(score_map) + + # Handle ROI (Region of Interest) + if roi_img is not None: + roi_img = (roi_img > 0.5).float() # Threshold to binary mask + for i in range(score_map.shape[0]): + score_map[i] *= roi_img[0, 0] # Element-wise multiplication + + # Normalization using min-max scaling (avoiding division by zero) + max_score = score_map.max() + min_score = score_map.min() + scores = (score_map - min_score) / torch.clamp(max_score - min_score, min=1e-8) + + return scores + def calculate_anormal_scores(score_map, crop_size): N = len(score_map) score_map = np.vstack(score_map) @@ -571,7 +631,22 @@ def calculate_anormal_scores(score_map, crop_size): anormal_scores = np.zeros((score_map.shape[0])) for i in range(score_map.shape[0]): anormal_scores[i] = score_map[i].max() + return anormal_scores + +def calculate_anormal_scores_torch(score_map, crop_size): + N = len(score_map) + + + # Stack the score maps into a single tensor + score_map = torch.vstack(score_map) + score_map = score_map.unsqueeze(0).view(N, crop_size, crop_size) + + # Calculate anormal scores + anormal_scores = np.zeros((N)) + for i in range(score_map.shape[0]): + anormal_scores[i] = score_map[i].max().cpu().numpy() + return anormal_scores diff --git a/anomaly_detection/padim_old/padim.py b/anomaly_detection/padim_old/padim.py new file mode 100644 index 000000000..1f37833d4 --- /dev/null +++ b/anomaly_detection/padim_old/padim.py @@ -0,0 +1,605 @@ +import os +import sys +import time +from collections import OrderedDict +import random +import pickle + +import numpy as np +import cv2 +from PIL import Image +import matplotlib +import matplotlib.pyplot as plt + +import ailia + +# import original modules +sys.path.append('../../util') +from arg_utils import get_base_parser, update_parser, get_savepath # noqa: E402 +from model_utils import check_and_download_models # noqa: E402 +from detector_utils import load_image # noqa: E402 +import webcamera_utils # noqa: E402 + +# logger +from logging import getLogger # noqa: E402 + +from padim_utils import * + +logger = getLogger(__name__) + +# ====================== +# Parameters +# ====================== + +REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/padim/' + +IMAGE_PATH = './bottle_000.png' +SAVE_IMAGE_PATH = './output.png' +IMAGE_RESIZE = 256 +IMAGE_SIZE = 224 +KEEP_ASPECT = True + +# ====================== +# Arguemnt Parser Config +# ====================== + +parser = get_base_parser('PaDiM model', IMAGE_PATH, SAVE_IMAGE_PATH) +parser.add_argument( + '-a', '--arch', default='resnet18', choices=('resnet18', 'wide_resnet50_2'), + help='arch model.' +) +parser.add_argument( + '-f', '--feat', metavar="FILE", default=None, + help='train set feature files.' +) +parser.add_argument( + '-bs', '--batch_size', default=32, + help='batch size.' +) +parser.add_argument( + '-tr', '--train_dir', metavar="DIR", default="./train", + help='directory of the train files.' +) +parser.add_argument( + '-gt', '--gt_dir', metavar="DIR", default="./gt_masks", + help='directory of the ground truth mask files.' +) +parser.add_argument( + '--seed', type=int, default=1024, + help='random seed' +) +parser.add_argument( + '-th', '--threshold', type=float, default=None, + help='threshold' +) +parser.add_argument( + '-ag', '--aug', action='store_true', + help='process with augmentation.' +) +parser.add_argument( + '-an', '--aug_num', type=int, default=5, + help='specify the amplification number of augmentation.' +) +parser.add_argument( + '-eon', '--enable_optimization', type=bool, default=False, + help='Flag to enable optimized code' +) +parser.add_argument( + '--compare_optimization', type=bool, default=False, + help='Flag to compare output of optimization with original code' +) +parser.add_argument( + '--compare_optimization', type=bool, default=False, + help='Flag to compare output of optimization with original code' +) + +parser.add_argument( + '--save_format', metavar="FILE", default="pkl", + help='chose training file format pt, npy or pkl.' +) + +parser.add_argument( + '--optimization_device', metavar="device", default='cpu', choices=('cpu', 'cuda', 'mps'), + help='chose optimization device' +) + +args = update_parser(parser) + +if args.compare_optimization: + args.enable_optimization = True + train_output_list=[] + +if args.enable_optimization: + import torch + if args.optimization_device=="cuda" and torch.cuda.is_available(): + device = torch.device("cuda") + + elif args.optimization_device=="mps" and torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + logger.info("Torch device : " + str(device)) + + + +# ====================== +# Main functions +# ====================== + + +def plot_fig(file_list, test_imgs, scores, anormal_scores, gt_imgs, threshold, savepath): + num = len(file_list) + vmax = scores.max() * 255. + vmin = scores.min() * 255. + for i in range(num): + image_path = file_list[i] + img = test_imgs[i] + img = denormalization(img) + if gt_imgs is not None: + gt = gt_imgs[i] + gt = gt.transpose(1, 2, 0).squeeze() + else: + gt = np.zeros((1,1,1)) + print(scores[i], scores[i].shape) + heat_map, mask, vis_img = visualize(img, scores[i], threshold) + + fig_img, ax_img = plt.subplots(1, 5, figsize=(12, 3)) + fig_img.subplots_adjust(right=0.9) + + fig_img.suptitle("Input : " + image_path + " Anomaly score : " + str(anormal_scores[i])) + logger.info("Anomaly score : " + str(anormal_scores[i])) + + norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax) + for ax_i in ax_img: + ax_i.axes.xaxis.set_visible(False) + ax_i.axes.yaxis.set_visible(False) + ax_img[0].imshow(img) + ax_img[0].title.set_text('Image') + ax_img[1].imshow(gt, cmap='gray') + ax_img[1].title.set_text('GroundTruth') + ax = ax_img[2].imshow(heat_map, cmap='jet', norm=norm) + ax_img[2].imshow(img, cmap='gray', interpolation='none') + ax_img[2].imshow(heat_map, cmap='jet', alpha=0.5, interpolation='none') + ax_img[2].title.set_text('Predicted heat map') + ax_img[3].imshow(mask, cmap='gray') + ax_img[3].title.set_text('Predicted mask') + ax_img[4].imshow(vis_img) + ax_img[4].title.set_text('Segmentation result') + left = 0.92 + bottom = 0.15 + width = 0.015 + height = 1 - 2 * bottom + rect = [left, bottom, width, height] + cbar_ax = fig_img.add_axes(rect) + cb = plt.colorbar(ax, shrink=0.6, cax=cbar_ax, fraction=0.046) + cb.ax.tick_params(labelsize=8) + font = { + 'family': 'serif', + 'color': 'black', + 'weight': 'normal', + 'size': 8, + } + cb.set_label('Anomaly Score', fontdict=font) + + if ('.' in savepath.split('/')[-1]): + savepath_tmp = get_savepath(savepath, image_path, ext='.png') + else: + filename_tmp = image_path.split('/')[-1] + ext_tmp = '.' + filename_tmp.split('.')[-1] + filename_tmp = filename_tmp.replace(ext_tmp, '.png') + savepath_tmp = '%s/%s' % (savepath, filename_tmp) + logger.info(f'saved at : {savepath_tmp}') + fig_img.savefig(savepath_tmp, dpi=100) + plt.close() + +def infer_init_run(net, params, train_outputs, IMAGE_SIZE): + import numpy as np + dummy_image = np.random.rand(1, 3, 224, 224) * 255.0 # Scale between 0 and 255 + # Convert the dtype to float32 for efficiency + dummy_image = dummy_image.astype(np.float32) + logger.info(f"PaDiM initialization inference starts!") + if args.enable_optimization: + score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device, logger) + else: + score = infer(net, params, train_outputs, dummy_image, IMAGE_SIZE) + logger.info(f"PaDiM initialization inference finish!") + + +def train_from_image_or_video(net, params): + # training + if args.enable_optimization: + train_outputs = training_optimized(net, params, IMAGE_RESIZE, IMAGE_SIZE, KEEP_ASPECT, int(args.batch_size), args.train_dir, args.aug, args.aug_num, args.seed, logger) + else: + train_outputs = training(net, params, IMAGE_RESIZE, IMAGE_SIZE, KEEP_ASPECT, int(args.batch_size), args.train_dir, args.aug, args.aug_num, args.seed, logger) + # save learned distribution + if args.feat: + train_feat_file = args.feat + else: + train_dir = args.train_dir + train_feat_file = str(os.path.basename(train_dir))+"."+str(args.save_format) + + train_outputs=_save_training_flie(train_feat_file, args.save_format, train_outputs) + + return train_outputs + + +def load_gt_imgs(gt_type_dir): + gt_imgs = [] + for i_img in range(0, len(args.input)): + image_path = args.input[i_img] + gt_img = None + if gt_type_dir: + fname = os.path.splitext(os.path.basename(image_path))[0] + gt_fpath = os.path.join(gt_type_dir, fname + '_mask.png') + if os.path.exists(gt_fpath): + gt_img = load_image(gt_fpath) + gt_img = cv2.cvtColor(gt_img, cv2.COLOR_BGRA2RGB) + gt_img = preprocess(gt_img, IMAGE_RESIZE, mask=True, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) + if gt_img is not None: + gt_img = gt_img[0, [0]] + else: + gt_img = np.zeros((1, IMAGE_SIZE, IMAGE_SIZE)) + gt_imgs.append(gt_img) + return gt_imgs + + +def decide_threshold_from_gt_image(net, params, train_outputs, gt_imgs): + score_map = [] + for i_img in range(0, len(args.input)): + logger.info('from (%s) ' % (args.input[i_img])) + + image_path = args.input[i_img] + img = load_image(image_path) + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) + if args.enable_optimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + else: + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + + + score_map.append(dist_tmp) + + scores = normalize_scores(score_map, IMAGE_SIZE) + print("scores shape", scores.shape) + + threshold = decide_threshold(scores, gt_imgs) + + return threshold + +def infer_from_image(net, params, train_outputs, threshold, gt_imgs): + if len(args.input) == 0: + logger.error("Input file not found") + return + + test_imgs = [] + + score_map = [] + infer_init_run(net, params, train_outputs, IMAGE_SIZE) + for i_img in range(0, len(args.input)): + logger.info('from (%s) ' % (args.input[i_img])) + + image_path = args.input[i_img] + img = load_image(image_path) + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) + + test_imgs.append(img[0]) + + if args.benchmark: + logger.info('BENCHMARK mode') + total_time = 0 + if args.enable_optimization: + for i in range(args.benchmark_count): + start = int(round(time.time() * 1000)) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') + else: + for i in range(args.benchmark_count): + start = int(round(time.time() * 1000)) + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + end = int(round(time.time() * 1000)) + logger.info(f'\tailia processing time {end - start} ms') + if i != 0: + total_time = total_time + (end - start) + logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') + if args.compare_optimization: + logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger))}') + + + else: + if args.enable_optimization: + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + else: + dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) + if args.compare_optimization: + logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger)))) + + + score_map.append(dist_tmp) + print("score_map shape ", np.asarray(score_map).shape) + scores = normalize_scores(score_map, IMAGE_SIZE) + print("normalize_scores shape ", np.asarray(scores).shape) + print("IMAGE_SIZE shape ", IMAGE_SIZE) + anormal_scores = calculate_anormal_scores(score_map, IMAGE_SIZE) + print("IMAGE_SIZE shape ", np.asarray(anormal_scores).shape) + + # Plot gt image + plot_fig(args.input, test_imgs, scores, anormal_scores, gt_imgs, threshold, args.savepath) + + +def infer_from_video(net, params, train_outputs, threshold): + capture = webcamera_utils.get_capture(args.video) + if args.savepath != SAVE_IMAGE_PATH: + f_h = int(IMAGE_SIZE) + f_w = int(IMAGE_SIZE) * 3 + writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) + else: + writer = None + + score_map = [] + + frame_shown = False + infer_init_run(net, params, train_outputs, IMAGE_SIZE) + if args.enable_optimization: + while(True): + ret, frame = capture.read() + if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: + break + if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0: + break + + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) + + dist_tmp = infer_optimized(net, params, train_outputs, img, device,logger) + + score_map.append(dist_tmp) + scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production + + heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) + frame = pack_visualize(heat_map, mask, vis_img, scores, IMAGE_SIZE) + + cv2.imshow('frame', frame) + frame_shown = True + + if writer is not None: + writer.write(frame) + else: + while(True): + ret, frame = capture.read() + if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: + break + if frame_shown and cv2.getWindowProperty('frame', cv2.WND_PROP_VISIBLE) == 0: + break + + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) + + dist_tmp = infer(net, params, train_outputs, img) + + score_map.append(dist_tmp) + scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production + + heat_map, mask, vis_img = visualize(denormalization(img[0]), scores[len(scores)-1], threshold) + frame = pack_visualize(heat_map, mask, vis_img, scores, IMAGE_SIZE) + + cv2.imshow('frame', frame) + frame_shown = True + + if writer is not None: + writer.write(frame) + + capture.release() + cv2.destroyAllWindows() + if writer is not None: + writer.release() + + +def train_and_infer(net, params): + timestart=time.time() + if args.feat: + train_outputs=_load_training_file(args.feat, args.save_format) + logger.info('loaded.') + else: + train_outputs = train_from_image_or_video(net, params) + + if args.threshold is None: + if args.video: + threshold = 0.5 + gt_imgs = None + logger.info('Please set threshold manually for video mdoe') + else: + gt_type_dir = args.gt_dir if args.gt_dir else None + gt_imgs = load_gt_imgs(gt_type_dir) + + threshold = decide_threshold_from_gt_image(net, params, train_outputs, gt_imgs) + logger.info('Optimal threshold: %f' % threshold) + else: + threshold = args.threshold + gt_imgs = None + + if args.video: + infer_from_video(net, params, train_outputs, threshold) + else: + infer_from_image(net, params, train_outputs, threshold, gt_imgs) + logger.info('Script finished successfully.') + +def _save_training_flie(train_feat_file, save_format, train_outputs): + if args.compare_optimization: + train_output_list.append(train_outputs) + + if not args.enable_optimization: + if save_format == "pkl" : + if train_feat_file==None: + train_feat_file = "train.pkl" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "npy" : + filename=train_feat_file.split(".")[0].strip() + for i, output in enumerate(train_outputs): + + if train_feat_file==None: + train_feat_file = "train_output_"+str(i)+".npy" + else: + train_feat_file = filename+str("_")+str(i)+".npy" + + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"{filename}_{i}.npy", output) + logger.info('Saved.') + elif save_format == "pt": + if train_feat_file==None: + train_feat_file = "train.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') + + + else: + if save_format=="npy": + filename=train_feat_file.split(".")[0].strip() + for i, output in enumerate(train_outputs): + if train_feat_file==None: + train_feat_file = "train_output_"+str(i)+".npy" + else: + train_feat_file = filename+"_"+str(i)+".npy" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + np.save(f"{filename}_{i}.npy", output) + logger.info('Saved.') + + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + if save_format == "pkl" : + if train_feat_file==None: + train_feat_file = "trainOptimized.pkl" + logger.info('saving train set feature to: %s ...' % train_feat_file) + with open(train_feat_file, 'wb') as f: + pickle.dump(train_outputs, f) + logger.info('Saved.') + elif save_format == "pt": + if train_feat_file==None: + train_feat_file = "trainOptimized.pt" + logger.info('Saving train set feature to: %s ...' % train_feat_file) + torch.save(train_outputs, train_feat_file) + logger.info('Saved.') + + + if args.compare_optimization: + train_output_list.append(train_outputs) + + return train_outputs + +def _load_training_file(train_feat_file, save_format): + if _check_file_exists(train_feat_file, save_format): + if not train_feat_file: + train_feat_file = "trainOptimized."+save_format + else: + save_format=train_feat_file.split(".")[1].strip() + logger.info(f"Save format {save_format}") + + if args.enable_optimization: + + + if save_format== "pkl": + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + if train_feat_file: + train_feat_file=train_feat_file.split("_")[0].strip() + else: + train_feat_file="train_" + + while True: + try: + + logger.info(f"{train_feat_file}_{i}.npy") + train_outputs.append(np.load(f"{train_feat_file}_{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + train_outputs=[torch.from_numpy(train_outputs[0]).float().to(device), train_outputs[1], + torch.from_numpy(train_outputs[2]).float().to(device), train_outputs[3] ] + elif save_format == "pt": + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + else: + + if save_format == "pkl": + train_feat_file = "train."+save_format + logger.info(f"Loading {train_feat_file}") + with open(train_feat_file, 'rb') as f: + train_outputs = pickle.load(f) + elif save_format == "npy": + train_outputs = [] + i = 0 + if train_feat_file: + train_feat_file=train_feat_file.split("_")[0].strip() + else: + train_feat_file="train_" + while True: + try: + + logger.info(f"{train_feat_file}_{i}.npy") + train_outputs.append(np.load(f"{train_feat_file}_{i}.npy", allow_pickle=True)) + i += 1 + except FileNotFoundError: + break # Stop when there are no more files to load + + elif save_format == "pt": + train_feat_file = "train."+save_format + logger.info(f"Loading {train_feat_file}") + train_outputs = torch.load(train_feat_file) + train_outputs_numpy = [] + if train_outputs[0] is torch.Tensor: + for item in train_outputs: + if isinstance(item, torch.Tensor): + train_outputs_numpy.append(item.cpu().numpy()) # Move to CPU and convert to NumPy + else: + train_outputs_numpy.append(item) + return train_outputs_numpy + return train_outputs + else: + logger.info(f"filename {train_feat_file} have not been found") + + +def _check_file_exists(train_feat_file, save_format): + if train_feat_file == None: + if save_format=="npy": + filename="train_output_0.npy" + elif args.enable_optimization: + filename="trainOptimized."+save_format + else: + filename="train."+save_format + if not os.path.isfile(filename): + logger.info(f"File {filename} does not exist. Unable to load the model") + else: + return os.path.isfile(train_feat_file) + + return os.path.isfile(filename) + + +def main(): + # model files check and download + starttime=time.time() + weight_path, model_path, params = get_params(args.arch) + check_and_download_models(weight_path, model_path, REMOTE_PATH) + + # create net instance + net = ailia.Net(model_path, weight_path, env_id=args.env_id) + + # check input + train_and_infer(net, params) + logger.info('Script finished execution time: '+str(int((time.time()-starttime)*1000))) + + +if __name__ == '__main__': + main() From e1c471124210952ddb486bb99d68d1c695bdfe18 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 8 Jul 2024 16:46:06 +0900 Subject: [PATCH 13/15] fixed data loading --- anomaly_detection/padim/padim.py | 45 +++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/anomaly_detection/padim/padim.py b/anomaly_detection/padim/padim.py index b1af42ee7..38ab09bc1 100644 --- a/anomaly_detection/padim/padim.py +++ b/anomaly_detection/padim/padim.py @@ -113,11 +113,14 @@ import torch if args.optimization_device=="cuda" and torch.cuda.is_available(): device = torch.device("cuda") + elif args.optimization_device=="mps" and torch.backends.mps.is_available(): device = torch.device("mps") else: device = torch.device("cpu") + + weights_torch=gaussian_kernel1d_torch(4, 0, int(4.0*float(4)+0.5), device).unsqueeze(0).unsqueeze(0).expand(1, 1, 33) logger.info("Torch device : " + str(device)) @@ -140,7 +143,11 @@ def plot_fig(file_list, test_imgs, scores, anormal_scores, gt_imgs, threshold, s gt = gt.transpose(1, 2, 0).squeeze() else: gt = np.zeros((1,1,1)) - heat_map, mask, vis_img = visualize(img, scores[i], threshold) + if args.enable_optimization: + heat_map, mask, vis_img = visualize(img, scores[i].squeeze(0).cpu().numpy(), threshold) + else: + heat_map, mask, vis_img = visualize(img, scores[i], threshold) + fig_img, ax_img = plt.subplots(1, 5, figsize=(12, 3)) fig_img.subplots_adjust(right=0.9) @@ -197,8 +204,9 @@ def infer_init_run(net, params, train_outputs, IMAGE_SIZE): # Convert the dtype to float32 for efficiency dummy_image = dummy_image.astype(np.float32) logger.info(f"PaDiM initialization inference starts!") + if args.enable_optimization: - score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device, logger) + score = infer_optimized(net, params, train_outputs, dummy_image, IMAGE_SIZE, device, logger, weights_torch) else: score = infer(net, params, train_outputs, dummy_image, IMAGE_SIZE) logger.info(f"PaDiM initialization inference finish!") @@ -252,16 +260,19 @@ def decide_threshold_from_gt_image(net, params, train_outputs, gt_imgs): img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT, crop_size = IMAGE_SIZE) if args.enable_optimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger, weights_torch) else: dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) score_map.append(dist_tmp) + if args.enable_optimization: + scores = normalize_scores_torch(score_map, IMAGE_SIZE) + threshold = decide_threshold(scores.cpu().numpy(), gt_imgs) - scores = normalize_scores(score_map, IMAGE_SIZE) - - threshold = decide_threshold(scores, gt_imgs) + else: + scores = normalize_scores_torch(score_map, IMAGE_SIZE) + threshold = decide_threshold(scores, gt_imgs) return threshold @@ -290,7 +301,7 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): if args.enable_optimization: for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger, weights_torch) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') if i != 0: @@ -306,22 +317,27 @@ def infer_from_image(net, params, train_outputs, threshold, gt_imgs): total_time = total_time + (end - start) logger.info(f'\taverage time {total_time / (args.benchmark_count - 1)} ms') if args.compare_optimization: - logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger))}') + logger.info(f'\tResults of optimized and original code is the same: {np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger, weights_torch))}') else: if args.enable_optimization: - dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, IMAGE_SIZE, device,logger, weights_torch) else: dist_tmp = infer(net, params, train_outputs, img, IMAGE_SIZE) if args.compare_optimization: - logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger)))) + logger.info('Results of optimized and original code is the same: '+ str(np.allclose(infer(net, params, train_output_list[0], img, IMAGE_SIZE), infer_optimized(net, params, train_output_list[1], img, IMAGE_SIZE, device,logger, weights_torch)))) score_map.append(dist_tmp) + if args.enable_optimization: + scores = normalize_scores_torch(score_map, IMAGE_SIZE) + anormal_scores = calculate_anormal_scores_torch(score_map, IMAGE_SIZE) + + else: + scores = normalize_scores(score_map, IMAGE_SIZE) + anormal_scores = calculate_anormal_scores(score_map, IMAGE_SIZE) - scores = normalize_scores(score_map, IMAGE_SIZE) - anormal_scores = calculate_anormal_scores(score_map, IMAGE_SIZE) # Plot gt image plot_fig(args.input, test_imgs, scores, anormal_scores, gt_imgs, threshold, args.savepath) @@ -340,6 +356,7 @@ def infer_from_video(net, params, train_outputs, threshold): frame_shown = False infer_init_run(net, params, train_outputs, IMAGE_SIZE) + if args.enable_optimization: while(True): ret, frame = capture.read() @@ -351,7 +368,7 @@ def infer_from_video(net, params, train_outputs, threshold): img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) img = preprocess(img, IMAGE_RESIZE, keep_aspect=KEEP_ASPECT) - dist_tmp = infer_optimized(net, params, train_outputs, img, device,logger) + dist_tmp = infer_optimized(net, params, train_outputs, img, device,logger, weights_torch) score_map.append(dist_tmp) scores = normalize_scores(score_map) # min max is calculated dynamically, please set fixed min max value from calibration data for production @@ -505,6 +522,8 @@ def _load_training_file(train_feat_file, save_format): logger.info(f"Loading {train_feat_file}") with open(train_feat_file, 'rb') as f: train_outputs = pickle.load(f) + train_outputs=[train_outputs[0].to(device), train_outputs[1], + train_outputs[2].to(device), train_outputs[3] ] elif save_format == "npy": train_outputs = [] i = 0 From 92a576c7bc99ae2e6ed58b485e08ea1a1702ef32 Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 8 Jul 2024 16:58:11 +0900 Subject: [PATCH 14/15] fixed bugs --- anomaly_detection/padim/padim_gui.py | 2 ++ anomaly_detection/padim/padim_utils.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/anomaly_detection/padim/padim_gui.py b/anomaly_detection/padim/padim_gui.py index 6452c1df5..c9e0c05fc 100644 --- a/anomaly_detection/padim/padim_gui.py +++ b/anomaly_detection/padim/padim_gui.py @@ -365,6 +365,8 @@ def test_button_clicked(): logger.info(f"Loading {train_feat_file}") with open(train_feat_file, 'rb') as f: train_outputs = pickle.load(f) + train_outputs=[train_outputs[0].to(device), train_outputs[1], + train_outputs[2].to(device), train_outputs[3] ] elif save_format == "npy": train_outputs = [] i = 0 diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 76053b962..606ec5cd7 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -562,9 +562,9 @@ def gaussian_kernel1d_torch(sigma, order, radius, device): """ if order < 0: raise ValueError('order must be non-negative') - exponent_range = torch.arange(order + 1, device=device) + #exponent_range = torch.arange(order + 1, device=device) sigma2 = sigma * sigma - x = torch.arange(-radius, radius + 1, dtype=torch.float64, device=device) + x = torch.arange(-radius, radius + 1, dtype=torch.float32, device=device) phi_x = torch.exp(-0.5 / sigma2 * x ** 2) phi_x = phi_x / phi_x.sum() @@ -573,14 +573,14 @@ def gaussian_kernel1d_torch(sigma, order, radius, device): def gausian_filter_torch(input, weights, output=None, mode='constant', cval=0.0, origin=0): - input=input.permute(2, 0,1 ).to(dtype=torch.float64) - input_padded=F.pad(input, pad=(16, 16), mode='reflect') - output1=torch.nn.functional.conv1d(input_padded, weights.to(dtype=torch.float64) ) #torch.Size([448, 1, 448]) + input=input.permute(2, 0,1 ) + input=F.pad(input, pad=(16, 16), mode='reflect') + input=F.conv1d(input, weights ) - input2=output1.permute(2, 1,0 ) - input_padded2=F.pad(input2, pad=(16, 16), mode='reflect', ) - output2=torch.nn.functional.conv1d(input_padded2, weights.to(dtype=torch.float64) ).permute(1, 0,2 ) - return output2 + input=input.permute(2, 1,0 ) + input=F.pad(input, pad=(16, 16), mode='reflect', ) + input=F.conv1d(input, weights).permute(1, 0,2 ) + return input def normalize_scores(score_map, crop_size, roi_img = None): From 44786a2b8ebfb908f04e105c18f9472e895b272c Mon Sep 17 00:00:00 2001 From: YToleubay Date: Mon, 8 Jul 2024 18:00:15 +0900 Subject: [PATCH 15/15] reduced memory consumption --- anomaly_detection/padim/padim_utils.py | 35 ++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/anomaly_detection/padim/padim_utils.py b/anomaly_detection/padim/padim_utils.py index 606ec5cd7..1fef26520 100644 --- a/anomaly_detection/padim/padim_utils.py +++ b/anomaly_detection/padim/padim_utils.py @@ -519,9 +519,29 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device, logger, embedding_vectors = embedding_vectors.view(B, C, H * W) # calculate distance matrix - mean_vectors = train_outputs[0] - inv_cov_matrices = train_outputs[2] - samples = embedding_vectors[0] + #mean_vectors = train_outputs[0] + #inv_cov_matrices = train_outputs[2] + #samples = embedding_vectors[0] + # + if str(device) not in str(train_outputs[0].device): + logger.info(f"Changing device from {train_outputs[0].device} to {device}") + train_outputs[0]=train_outputs[0].to(device) + embedding_vectors[0]=embedding_vectors[0].to(device) + train_outputs[2]=train_outputs[2].to(device) + # Step 1: Compute the difference between each sample and its corresponding mean + dist_tmp = embedding_vectors[0] - train_outputs[0] + # Step 2: Apply the inverse covariance matrix + transformed_differences = torch.einsum('ijk,jk->ik', train_outputs[2], dist_tmp) + + # Step 3: Compute the Mahalanobis distance + dist_tmp = torch.sqrt(torch.sum(dist_tmp * transformed_differences, dim=0)) + transformed_differences=0 + # upsample + dist_tmp=F.interpolate(dist_tmp.view(1, -1).view( H, W).unsqueeze(0).unsqueeze(0), + size=(crop_size, crop_size), mode='bilinear', align_corners=False).squeeze(0) + dist_tmp=gausian_filter_torch(dist_tmp, weights_torch, mode='reflect') + + """ if str(device) not in str(mean_vectors.device): logger.info(f"Changing device from {mean_vectors.device} to {device}") mean_vectors=mean_vectors.to(device) @@ -534,8 +554,9 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device, logger, # Step 3: Compute the Mahalanobis distance dist_tmp = torch.sqrt(torch.sum(differences * transformed_differences, dim=0)) # upsample + """ - dist_tmp = dist_tmp.view(1, -1).view( H, W) + #dist_tmp = dist_tmp.view(1, -1).view( H, W) """ dist_tmp = dist_tmp.view(1, -1).view( H, W).cpu().numpy() print("crop_size ", crop_size) @@ -549,9 +570,9 @@ def infer_optimized(net, params, train_outputs, img, crop_size, device, logger, print("Shape after gausian filter: ", dist_tmp.shape) """ - dist_tmp=F.interpolate(dist_tmp.unsqueeze(0).unsqueeze(0), - size=(crop_size, crop_size), mode='bilinear', align_corners=False).squeeze(0) - dist_tmp=gausian_filter_torch(dist_tmp, weights_torch, mode='reflect') + #dist_tmp=F.interpolate(dist_tmp.unsqueeze(0).unsqueeze(0), + #size=(crop_size, crop_size), mode='bilinear', align_corners=False).squeeze(0) + #dist_tmp=gausian_filter_torch(dist_tmp, weights_torch, mode='reflect') return dist_tmp