Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scale=0.5或者0.25时报错 #385

Open
ZXMMD opened this issue Dec 31, 2024 · 4 comments
Open

scale=0.5或者0.25时报错 #385

ZXMMD opened this issue Dec 31, 2024 · 4 comments

Comments

@ZXMMD
Copy link

ZXMMD commented Dec 31, 2024

import os
import sys
import cv2
import math
import glob
import torch
import argparse
import warnings
import numpy as np
from time import time
from tqdm import tqdm
from padder import InputPadder # 填充到32的整数倍
from model.pytorch_msssim import ssim_matlab
from model.RIFE import Model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = Model()
model.load_model('train_log')
model.eval()
model.device()

def getXVFI(dir, multiple=8, t_step_size=32):
    """ make [I0,I1,It,t,scene_folder] """
    testPath = []
    t = np.linspace((1 / multiple), (1 - (1 / multiple)), (multiple - 1))
    for type_folder in sorted(glob.glob(os.path.join(dir, '*', ''))):
        for scene_folder in sorted(glob.glob(os.path.join(type_folder, '*', ''))):
            frame_folder = sorted(glob.glob(scene_folder + '*.png'))
            for idx in range(0, len(frame_folder), t_step_size):
                if idx == len(frame_folder) - 1:
                    break
                for mul in range(multiple - 1):
                    I0I1It_paths = []
                    I0I1It_paths.append(frame_folder[idx])
                    I0I1It_paths.append(frame_folder[idx + t_step_size])
                    I0I1It_paths.append(frame_folder[idx + int((t_step_size // multiple) * (mul + 1))])
                    I0I1It_paths.append(t[mul])
                    testPath.append(I0I1It_paths)
    return testPath


def _recursive_generator(model, frame1, frame2, scale, num_recursions):
    if num_recursions == 0:
        yield frame1
    else:
        with torch.no_grad():
            mid_frame = model.inference(frame1, frame2, scale=scale, TTA=True)
            yield from _recursive_generator(model, frame1, mid_frame, scale, num_recursions - 1)
            yield from _recursive_generator(model, mid_frame, frame2, scale, num_recursions - 1)


def test_XTEST(mode, path, model, device, save_result, save_dir):
    if save_result:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    listFiles = getXVFI(path, multiple=8, t_step_size=32)
    count = 0
    input_frames = [item[:2] for item in listFiles][::7]
    gts = [item[2:] for item in listFiles]
    fltPsnr, fltSsim = [], []
    count = 0
    for intFrame in tqdm(input_frames):
        npyOne = np.array(cv2.imread(intFrame[0])).astype(np.float32) * (1.0 / 255.0)
        npyTwo = np.array(cv2.imread(intFrame[1])).astype(np.float32) * (1.0 / 255.0)
        gtFrames = gts[count*7:(count+1)*7]
        count += 1

        if mode == '2K':
            scale = 0.5
            npyOne = cv2.resize(src=npyOne, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            npyTwo = cv2.resize(src=npyTwo, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
        elif mode == '4K':
            scale = 0.25

        tenOne = torch.FloatTensor(np.ascontiguousarray(npyOne.transpose(2, 0, 1)[None, :, :, :])).to(device)
        tenTwo = torch.FloatTensor(np.ascontiguousarray(npyTwo.transpose(2, 0, 1)[None, :, :, :])).to(device)

        padder = InputPadder(tenOne.shape, 32)
        tenOne, tenTwo = padder.pad(tenOne, tenTwo)

        frames = list(_recursive_generator(model, tenOne, tenTwo, scale, 3))

        fltPsnr_single_testcase, fltSsim_single_testcase = [], []
        frames = frames[1:] 
        i = 0
        for frame in frames:
            tenEstimate = padder.unpad(frame[0])
            npyEstimate = (tenEstimate.detach().cpu().numpy().transpose(1, 2, 0) * 255.0).clip(0.0, 255.0).round().astype(np.uint8)

            if save_result:
                output_filename = os.path.join(save_dir, f"{(count-1) * 7 + i:04d}.png")  # 四位数字命名
                cv2.imwrite(output_filename, npyEstimate)

            tenEstimate = torch.FloatTensor(npyEstimate.transpose(2, 0, 1)[None, :, :, :]).to(device) / 255.0
            npyTruth = np.array(cv2.imread(gtFrames[i][0])).astype(np.float32) * (1.0 / 255.0)
            
            npyTruth = cv2.resize(src=npyTruth, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            if mode == '2K':
                npyTruth = cv2.resize(src=npyTruth, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            tenGT = torch.FloatTensor(np.ascontiguousarray(npyTruth.transpose(2, 0, 1)[None, :, :, :])).to(device)

            fltPsnr_single_testcase.append(-10 * math.log10(torch.mean((tenEstimate - tenGT) * (tenEstimate - tenGT)).cpu().data))
            fltSsim_single_testcase.append(ssim_matlab(tenEstimate, tenGT).detach().cpu().numpy())
            i = i + 1
        fltPsnr.append(np.mean(fltPsnr_single_testcase))
        fltSsim.append(np.mean(fltSsim_single_testcase))
    print('PSNR: %.2f, SSIM: %.4f' % (np.mean(fltPsnr), np.mean(fltSsim)))


path ='/data/data/xtest'
save_result=True
mode = '4K'
save_dir = './XTEST_4K'
test_XTEST(mode, path, model, device, save_result, save_dir)

作者您好,我使用上述代码测试RIFE在X-TEST-L(2K以及4K)数据集上的性能,当设置scale为1时,可以正常运行。当设置scale为0.25或者0.25时,就会报错:

(/data/env/myvfi) (RIFE) (base) lzj7@ins-7df6eojsifuykig9:/data/MyVFI/ECCV2022-RIFE/benchmark$ python XTEST_L.py 
  7%|████████████▏                                                                                                                                                                         | 1/15 [00:05<01:20,  5.77s/it]
Traceback (most recent call last):
  File "/data/MyVFI/ECCV2022-RIFE/benchmark/XTEST_L.py", line 99, in <module>
    test_XTEST(mode, path, model, device, save_result, save_dir)
  File "/data/MyVFI/ECCV2022-RIFE/benchmark/XTEST_L.py", line 80, in test_XTEST
    tenEstimate = model.inference(tenOne, tenTwo, scale=scale, TTA=True)[0]
  File "/data/MyVFI/ECCV2022-RIFE/model/RIFE.py", line 60, in inference
    flow, mask, merged, flow_teacher, merged_teacher, loss_distill = self.flownet(imgs, scale_list, timestep=timestep)
  File "/data/env/myvfi/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/MyVFI/ECCV2022-RIFE/model/IFNet.py", line 77, in forward
    flow_d, mask_d = stu[i](torch.cat((img0, img1, warped_img0, warped_img1, mask), 1), flow, scale=scale[i])
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2176 but got size 2304 for tensor number 2 in the list.

奇怪的是,第一组数据成功跑出了结果,到第二组测试数据就开始报错了。使用上述代码对XTEST以及Xiph数据集测试时,只要scale设为0.25或者0.5,就会遇到上述问题。

经过分析,是IFNet.py中flow的shape发生了变化:
image

@hzwer
Copy link
Owner

hzwer commented Jan 2, 2025

Hi
你可以把 InputPadder 中的 32 改成 128 / 256 试一下

@ZXMMD
Copy link
Author

ZXMMD commented Jan 2, 2025

Hi 你可以把 InputPadder 中的 32 改成 128 / 256 试一下

我试了一下改成128或者256,代码依然会报错。具体来说,在对XTEST-L-2K(scale=0.5)进行测试时,当设置padder = InputPadder(tenOne.shape, 256),测试到第5组数据报错:
image

当设置padder = InputPadder(tenOne.shape,128),测试到第4组数据报错:
image

当设置padder = InputPadder(tenOne.shape,64),测试到第3组数据报错:
image

貌似padder = InputPadder(tenOne.shape,divisor)中的divisor设置的越大,就能测试更多组数据。
这是我使用的InputPadder的代码:padder

@hzwer
Copy link
Owner

hzwer commented Jan 2, 2025

按道理原始 RIFE 需要 4*8=32 取整 padding
scale=0.5 和 0.25 分别需要 shape 取整到 64 和 128
你可以在 mid_frame = model.inference(frame1, frame2, scale=scale, TTA=True) 这一句之前,确认 frame1, frame2 的 shape 是否正确

@ZXMMD
Copy link
Author

ZXMMD commented Jan 3, 2025

image
(scale=0.5,padder = InputPadder(tenOne.shape, 64))frame1和frame2的shape确实被填充到了64的整数倍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants