From a2c7f79b5ad5da04303188892aa99a35731a0a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E5=A5=87=28yann=20qi=29?= <51905299+yannqi@users.noreply.github.com> Date: Thu, 17 Nov 2022 20:59:58 +0800 Subject: [PATCH] Bug fix. Change softmax dim. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In line 64, Change the softmax dim from 2 to 1. According to this line, `probs = F.softmax(self.scale * probs, dim=2)# batch x k x hw` In this code, the input dimension is [batch_size, num_class, fh\*fw]. And the softmax dimension is 2, which means that the summation of the dimensions of the feature map (fh\*fw) is one. However, in my opinion, I thinke the softmax dimension should be 1 to make the summation of the dimension of the num_class (num_class) is one. The corrected code is as follows: `probs = F.softmax(self.scale * probs, dim=1)# batch x num_class x hw` --- lib/models/seg_hrnet_ocr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/models/seg_hrnet_ocr.py b/lib/models/seg_hrnet_ocr.py index da19d236..487c8ec5 100644 --- a/lib/models/seg_hrnet_ocr.py +++ b/lib/models/seg_hrnet_ocr.py @@ -61,7 +61,7 @@ def forward(self, feats, probs): probs = probs.view(batch_size, c, -1) feats = feats.view(batch_size, feats.size(1), -1) feats = feats.permute(0, 2, 1) # batch x hw x c - probs = F.softmax(self.scale * probs, dim=2)# batch x k x hw + probs = F.softmax(self.scale * probs, dim=1)# batch x k x hw ocr_context = torch.matmul(probs, feats)\ .permute(0, 2, 1).unsqueeze(3)# batch x k x c return ocr_context