Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

求救:CPU模式下推理正确,GPU(CUDA驱动)推理异常 #3159

Open
HEMOURENNN opened this issue Jan 9, 2025 · 2 comments
Open
Labels
bug Something isn't working CUDA

Comments

@HEMOURENNN
Copy link

平台(如果交叉编译请再附上交叉编译目标平台): Win10

Platform(Include target platform as well if cross-compiling):

Github版本:3.0.3

Github Version: 8180b48

直接下载ZIP包请提供下载日期以及压缩包注释里的git版本(可通过7z l zip包路径命令并在输出信息中搜索Comment 获得,形如Comment = bc80b11110cd440aacdabbf59658d630527a7f2b)。 git clone请提供 git commit 第一行的commit id

Provide date (or better yet, git revision from the comment section of the zip. Obtainable using 7z l PATH/TO/ZIP and search for Comment in the output) if downloading source as zip,otherwise provide the first commit id from the output of git commit

编译方式:

Compiling Method

请在这里粘贴cmake参数或使用的cmake脚本路径以及完整输出
cmake  -G "Ninja"  -DCMAKE_BUILD_TYPE:STRING="Release"  -DMNN_BUILD_DEMO:BOOL="True" -DMNN_CPU_WEIGHT_DEQUANT_GEMM:BOOL="True" -DMNN_LOW_MEMORY:BOOL="True" -DMNN_BUILD_OPENCV:BOOL="True" -DMNN_CUDA:BOOL="True" -DMNN_WIN_RUNTIME_MT:BOOL="True"

具体代码:

cv::Mat TransImageToTensorByCV(const char* path, Tensor* tensor) {
	Tensor* inputTensor = new Tensor(tensor, Tensor::TENSORFLOW);
	auto targetChannel = inputTensor->channel();
	auto targetHeight = inputTensor->height();
	auto targetWidth = inputTensor->width();
	MNN_PRINT("input: w:%d , h:%d, bpp: %d\n", targetWidth, targetHeight, targetChannel);

	// 读取图像
	auto image = cv::imread(path, cv::IMREAD_COLOR);
	// 将图像通道顺序从 BGR 转换为 RGB
	cv::cvtColor(image, image, cv::COLOR_BGR2RGB);

	if (image.empty()) {
		MNN_ERROR("Can't open image %s\n", path);
		return cv::Mat();
	}
	MNN_PRINT("origin size: %d, %d\n", image.cols, image.rows);

	// 创建一个新的Mat用于存储填充后的图像
	cv::Mat dstUintImage(targetHeight, targetWidth, CV_8UC3, cv::Scalar(0, 0, 0));

	float scale = std::min(static_cast<float>(targetWidth) / image.cols, static_cast<float>(targetHeight) / image.rows);
	cv::Mat resizedImage;
	cv::resize(image, resizedImage, cv::Size(), scale, scale, cv::INTER_CUBIC);

	// 计算填充大小
	int startX = (targetWidth - resizedImage.cols) / 2;
	int startY = (targetHeight - resizedImage.rows) / 2;

	// 将归一化后的图像转移到0填充的Mat中
	resizedImage.copyTo(dstUintImage(cv::Rect(startX, startY, resizedImage.cols, resizedImage.rows)));
	// 将 BGR 图像转换为浮点数类型
	cv::Mat dstFloatImage;

	dstUintImage.convertTo(dstFloatImage, CV_32FC3, 1.0 / 255.0);

	// 标准化数据处理
	float means[3] = { 0.485f, 0.456f, 0.406f };
	float normals[3] = { 0.229f, 0.224f, 0.225f };
	dstFloatImage -= cv::Scalar(means[0], means[1], means[2]);
	dstFloatImage /= cv::Scalar(normals[0], normals[1], normals[2]);

	::memcpy(inputTensor->host<float>(), dstFloatImage.data, targetHeight * targetWidth * targetChannel * sizeof(float));
	tensor->copyFromHostTensor(inputTensor);

	return image;
}

void ProcBBoxResult(Tensor* bbox, int topkBoxNum, cv::Mat oriSearchImage) {
	std::shared_ptr<Tensor> bboxTensor(new Tensor(bbox, Tensor::TENSORFLOW));
	bbox->copyToHostTensor(bboxTensor.get());

	int stride = bbox->stride(0); // stride为4: (cx, cy, w, h)
	std::vector<std::vector<int>> topkBboxs;
	std::vector<cv::Mat> topkTargets;
	auto bboxValues = bboxTensor->host<float>();

	bboxTensor->print();
}

int main(int argc, const char* argv[]) {
	if (argc < 3) {
		MNN_PRINT("Usage: ./pictureRecognition.out model.mnn input0.jpg input1.jpg input2.jpg ... \n");
		return 0;
	}
	// 初始化网络
	std::shared_ptr<Interpreter> net(Interpreter::createFromFile(argv[1]), Interpreter::destroy);
	net->setCacheFile(".cachefile");
	net->setSessionMode(Interpreter::Session_Backend_Auto);
	net->setSessionHint(Interpreter::MAX_TUNING_NUMBER, 5);
	ScheduleConfig config;
	config.type = MNN_FORWARD_CUDA;



	if (config.type == MNN_FORWARD_CUDA) {
		BackendConfig bnconfig;

		// GPU设备选择
		MNNDeviceContext gpuDeviceConfig;
		gpuDeviceConfig.deviceId = 0;
		bnconfig.sharedContext = &gpuDeviceConfig;

		//
		BackendConfig::PrecisionMode precision = MNN::BackendConfig::Precision_High;
		bnconfig.precision = precision;

		config.backendConfig = &bnconfig;
	}

	auto session = net->createSession(config);
	float memoryUsage = 0.0f;
	net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
	float flops = 0.0f;
	net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
	int backendType[2];
	net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
	MNN_PRINT("Session Info: memory use %f MB, flops is %f M, backendType is %d, batch size = %d\n", memoryUsage, flops, backendType[0], argc - 2);

	// 初始化输入
	auto inputTemp = net->getSessionInput(session, "input.1");
	auto inputSearch = net->getSessionInput(session, "x.1");
	Tensor* input[] = { inputTemp ,inputSearch };
	cv::Mat inputImage[2];

	// 输入图像预处理
	for (int i = 0; i < 2; i++) {
		// TransImageToTensor(argv[i + 2], input[i], batch);
		inputImage[i] = TransImageToTensorByCV(argv[i + 2], input[i]);
	}

	// 初始化输出
	auto boxOutput = net->getSessionOutput(session, "1380");
	auto offsetMapOutput = net->getSessionOutput(session, "offset_map");
	auto scoreMapOutput = net->getSessionOutput(session, "score_map_ctr.3");
	auto sizeMapOutput = net->getSessionOutput(session, "size_map");

	Tensor* output[] = { boxOutput ,offsetMapOutput, scoreMapOutput ,sizeMapOutput };
	// 开始推理
	net->runSession(session);
	for (int i = 0; i < 4; i++) {
		auto dimType = output[i]->getDimensionType();
		if (output[i]->getType().code != halide_type_float) {
			dimType = Tensor::TENSORFLOW;
		}
	}

	// 处理bbox结果
	BBOX_RESULT bboxResult = ProcBBoxResult(boxOutput, 1, inputImage[1]);
	net->updateCacheFile(session);
	return 0;
}

在GPU(CUDA)和CPU下进行了多组不同数据的实验后发现:
GPU模式下ProcBBoxResult函数中bboxTensor->print();打印的第二个值永远都是错误的,而且值很大,其他的三个值都是正确的。下面列出2组数据:
数据①
CPU:
image
GPU:
image
数据②
CPU:
image
GPU:
image

@jxt1234 jxt1234 added CUDA bug Something isn't working labels Jan 9, 2025
@jxt1234
Copy link
Collaborator

jxt1234 commented Jan 9, 2025

原始模型麻烦发一下看看,另外用 backendTest.out 可以测试出问题么?

@HEMOURENNN
Copy link
Author

HEMOURENNN commented Jan 13, 2025

原始模型麻烦发一下看看,另外用 backendTest.out 可以测试出问题么?

大佬在吗?
我的模型是双输入的,大小有300+M。github好像上传不了,怎么发给你呢?
我目前也在尝试用backendTest.out进行测试

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working CUDA
Projects
None yet
Development

No branches or pull requests

2 participants