Add SIFT and ALIKED extractors + weights (#75)

Release of SIFT+LightGlue and ALIKED+LightGlue weights #5, #51 --------- Co-authored-by: Paul-Edouard Sarlin <[email protected]>
cvg · Oct 19, 2023 · 29f3e44 · 29f3e44
1 parent 7426bc7
commit 29f3e44
Showing 8 changed files with 1,057 additions and 70 deletions.
diff --git a/README.md b/README.md
@@ -26,8 +26,8 @@
 
 This repository hosts the inference code of LightGlue, a lightweight feature matcher with high accuracy and blazing fast inference. It takes as input a set of keypoints and descriptors for each image and returns the indices of corresponding points. The architecture is based on adaptive pruning techniques, in both network width and depth - [check out the paper for more details](https://arxiv.org/pdf/2306.13643.pdf).
 
-We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629) and [DISK](https://arxiv.org/abs/2006.13566) local features.
-The training end evaluation code will be released in July in a separate repo. To be notified, subscribe to [issue #6](https://github.com/cvg/LightGlue/issues/6).
+We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629), [DISK](https://arxiv.org/abs/2006.13566), [ALIKED](https://arxiv.org/abs/2304.03608) and [SIFT](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf) local features.
+The training end evaluation code can be found in our training library [glue-factory](https://github.com/cvg/glue-factory/).
 
 ## Installation and demo [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb)
 
@@ -43,14 +43,14 @@ We provide a [demo notebook](demo.ipynb) which shows how to perform feature extr
 Here is a minimal script to match two images:
 
 ```python
-from lightglue import LightGlue, SuperPoint, DISK
+from lightglue import LightGlue, SuperPoint, DISK, SIFT, ALIKED
 from lightglue.utils import load_image, rbd
 
 # SuperPoint+LightGlue
 extractor = SuperPoint(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='superpoint').eval().cuda()  # load the matcher
 
-# or DISK+LightGlue
+# or DISK+LightGlue, ALIKED+LightGlue or SIFT+LightGlue
 extractor = DISK(max_num_keypoints=2048).eval().cuda()  # load the extractor
 matcher = LightGlue(features='disk').eval().cuda()  # load the matcher
 
@@ -177,4 +177,4 @@ If you use any ideas from the paper or code from this repo, please consider citi
 
 
 ## License
-The pre-trained weights of LightGlue and the code provided in this repository are released under the [Apache-2.0 license](./LICENSE). [DISK](https://github.com/cvlab-epfl/disk) follows this license as well but SuperPoint follows [a different, restrictive license](https://github.com/magicleap/SuperPointPretrainedNetwork/blob/master/LICENSE) (this includes its pre-trained weights and its [inference file](./lightglue/superpoint.py)).
+The pre-trained weights of LightGlue and the code provided in this repository are released under the [Apache-2.0 license](./LICENSE). [DISK](https://github.com/cvlab-epfl/disk) follows this license as well but SuperPoint follows [a different, restrictive license](https://github.com/magicleap/SuperPointPretrainedNetwork/blob/master/LICENSE) (this includes its pre-trained weights and its [inference file](./lightglue/superpoint.py)). [ALIKED](https://github.com/Shiaoming/ALIKED) was published under a BSD-3-Clause license. 
diff --git a/lightglue/__init__.py b/lightglue/__init__.py
@@ -1,4 +1,6 @@
+from .aliked import ALIKED  # noqa
 from .disk import DISK  # noqa
 from .lightglue import LightGlue  # noqa
+from .sift import SIFT  # noqa
 from .superpoint import SuperPoint  # noqa
 from .utils import match_pair  # noqa
diff --git a/lightglue/aliked.py b/lightglue/aliked.py
diff --git a/lightglue/disk.py b/lightglue/disk.py
@@ -1,13 +1,10 @@
-from types import SimpleNamespace
-
 import kornia
 import torch
-import torch.nn as nn
 
-from .utils import ImagePreprocessor
+from .utils import Extractor
 
 
-class DISK(nn.Module):
+class DISK(Extractor):
     default_conf = {
         "weights": "depth",
         "max_num_keypoints": None,
@@ -18,24 +15,23 @@ class DISK(nn.Module):
     }
 
     preprocess_conf = {
-        **ImagePreprocessor.default_conf,
         "resize": 1024,
         "grayscale": False,
     }
 
     required_data_keys = ["image"]
 
     def __init__(self, **conf) -> None:
-        super().__init__()
-        self.conf = {**self.default_conf, **conf}
-        self.conf = SimpleNamespace(**self.conf)
+        super().__init__(**conf)  # Update with default configuration.
         self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
 
     def forward(self, data: dict) -> dict:
         """Compute keypoints, scores, descriptors for image"""
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
+        if image.shape[1] == 1:
+            image = kornia.color.grayscale_to_rgb(image)
         features = self.model(
             image,
             n=self.conf.max_num_keypoints,
@@ -57,15 +53,3 @@ def forward(self, data: dict) -> dict:
             "keypoint_scores": scores.to(image).contiguous(),
             "descriptors": descriptors.to(image).contiguous(),
         }
-
-    def extract(self, img: torch.Tensor, **conf) -> dict:
-        """Perform extraction with online resizing"""
-        if img.dim() == 3:
-            img = img[None]  # add batch dim
-        assert img.dim() == 4 and img.shape[0] == 1
-        shape = img.shape[-2:][::-1]
-        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
-        feats = self.forward({"image": img})
-        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
-        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
-        return feats
diff --git a/lightglue/lightglue.py b/lightglue/lightglue.py
@@ -314,6 +314,7 @@ class LightGlue(nn.Module):
         "name": "lightglue",  # just for interfacing
         "input_dim": 256,  # input descriptor dimension (autoselected from weights)
         "descriptor_dim": 256,
+        "add_scale_ori": False,
         "n_layers": 9,
         "num_heads": 4,
         "flash": True,  # enable FlashAttention if available.
@@ -339,25 +340,46 @@ class LightGlue(nn.Module):
     url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
 
     features = {
-        "superpoint": ("superpoint_lightglue", 256),
-        "disk": ("disk_lightglue", 128),
+        "superpoint": {
+            "weights": "superpoint_lightglue",
+            "input_dim": 256,
+        },
+        "disk": {
+            "weights": "disk_lightglue",
+            "input_dim": 128,
+        },
+        "aliked": {
+            "weights": "aliked_lightglue",
+            "input_dim": 128,
+        },
+        "sift": {
+            "weights": "sift_lightglue",
+            "input_dim": 128,
+            "add_scale_ori": True,
+        },
     }
 
     def __init__(self, features="superpoint", **conf) -> None:
         super().__init__()
-        self.conf = {**self.default_conf, **conf}
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
         if features is not None:
-            assert features in list(self.features.keys())
-            self.conf["weights"], self.conf["input_dim"] = self.features[features]
-        self.conf = conf = SimpleNamespace(**self.conf)
+            if features not in self.features:
+                raise ValueError(
+                    f"Unsupported features: {features} not in "
+                    f"{{{','.join(self.features)}}}"
+                )
+            for k, v in self.features[features].items():
+                setattr(conf, k, v)
 
         if conf.input_dim != conf.descriptor_dim:
             self.input_proj = nn.Linear(conf.input_dim, conf.descriptor_dim, bias=True)
         else:
             self.input_proj = nn.Identity()
 
         head_dim = conf.descriptor_dim // conf.num_heads
-        self.posenc = LearnableFourierPositionalEncoding(2, head_dim, head_dim)
+        self.posenc = LearnableFourierPositionalEncoding(
+            2 + 2 * self.conf.add_scale_ori, head_dim, head_dim
+        )
 
         h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
 
@@ -378,7 +400,7 @@ def __init__(self, features="superpoint", **conf) -> None:
 
         state_dict = None
         if features is not None:
-            fname = f"{conf.weights}_{self.version}.pth".replace(".", "-")
+            fname = f"{conf.weights}_{self.version.replace('.', '-')}.pth"
             state_dict = torch.hub.load_state_dict_from_url(
                 self.url.format(self.version, features), file_name=fname
             )
@@ -452,6 +474,13 @@ def _forward(self, data: dict) -> dict:
         kpts0 = normalize_keypoints(kpts0, size0).clone()
         kpts1 = normalize_keypoints(kpts1, size1).clone()
 
+        if self.conf.add_scale_ori:
+            kpts0 = torch.cat(
+                [kpts0] + [data0[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
+            kpts1 = torch.cat(
+                [kpts1] + [data1[k].unsqueeze(-1) for k in ("scales", "oris")], -1
+            )
         desc0 = data0["descriptors"].detach().contiguous()
         desc1 = data1["descriptors"].detach().contiguous()
 

diff --git a/lightglue/sift.py b/lightglue/sift.py
@@ -0,0 +1,216 @@
+import warnings
+
+import cv2
+import numpy as np
+import torch
+from kornia.color import rgb_to_grayscale
+from packaging import version
+
+try:
+    import pycolmap
+except ImportError:
+    pycolmap = None
+
+from .utils import Extractor
+
+
+def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
+    h, w = image_shape
+    ij = np.round(points - 0.5).astype(int).T[::-1]
+
+    # Remove duplicate points (identical coordinates).
+    # Pick highest scale or score
+    s = scales if scores is None else scores
+    buffer = np.zeros((h, w))
+    np.maximum.at(buffer, tuple(ij), s)
+    keep = np.where(buffer[tuple(ij)] == s)[0]
+
+    # Pick lowest angle (arbitrary).
+    ij = ij[:, keep]
+    buffer[:] = np.inf
+    o_abs = np.abs(angles[keep])
+    np.minimum.at(buffer, tuple(ij), o_abs)
+    mask = buffer[tuple(ij)] == o_abs
+    ij = ij[:, mask]
+    keep = keep[mask]
+
+    if nms_radius > 0:
+        # Apply NMS on the remaining points
+        buffer[:] = 0
+        buffer[tuple(ij)] = s[keep]  # scores or scale
+
+        local_max = torch.nn.functional.max_pool2d(
+            torch.from_numpy(buffer).unsqueeze(0),
+            kernel_size=nms_radius * 2 + 1,
+            stride=1,
+            padding=nms_radius,
+        ).squeeze(0)
+        is_local_max = buffer == local_max.numpy()
+        keep = keep[is_local_max[tuple(ij)]]
+    return keep
+
+
+def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
+    x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
+    x.clip_(min=eps).sqrt_()
+    return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
+
+
+def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
+    """
+    Detect keypoints using OpenCV Detector.
+    Optionally, perform description.
+    Args:
+        features: OpenCV based keypoints detector and descriptor
+        image: Grayscale image of uint8 data type
+    Returns:
+        keypoints: 1D array of detected cv2.KeyPoint
+        scores: 1D array of responses
+        descriptors: 1D array of descriptors
+    """
+    detections, descriptors = features.detectAndCompute(image, None)
+    points = np.array([k.pt for k in detections], dtype=np.float32)
+    scores = np.array([k.response for k in detections], dtype=np.float32)
+    scales = np.array([k.size for k in detections], dtype=np.float32)
+    angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
+    return points, scores, scales, angles, descriptors
+
+
+class SIFT(Extractor):
+    default_conf = {
+        "rootsift": True,
+        "nms_radius": 0,  # None to disable filtering entirely.
+        "max_num_keypoints": 4096,
+        "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
+        "detection_threshold": 0.0066667,  # from COLMAP
+        "edge_threshold": 10,
+        "first_octave": -1,  # only used by pycolmap, the default of COLMAP
+        "num_octaves": 4,
+    }
+
+    preprocess_conf = {
+        "resize": 1024,
+    }
+
+    required_data_keys = ["image"]
+
+    def __init__(self, **conf):
+        super().__init__(**conf)  # Update with default configuration.
+        backend = self.conf.backend
+        if backend.startswith("pycolmap"):
+            if pycolmap is None:
+                raise ImportError(
+                    "Cannot find module pycolmap: install it with pip"
+                    "or use backend=opencv."
+                )
+            options = {
+                "peak_threshold": self.conf.detection_threshold,
+                "edge_threshold": self.conf.edge_threshold,
+                "first_octave": self.conf.first_octave,
+                "num_octaves": self.conf.num_octaves,
+                "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
+            }
+            device = (
+                "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
+            )
+            if (
+                backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ) and pycolmap.__version__ < "0.5.0":
+                warnings.warn(
+                    "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
+                    "consider upgrading pycolmap or use the CUDA version.",
+                    stacklevel=1,
+                )
+            else:
+                options["max_num_features"] = self.conf.max_num_keypoints
+            self.sift = pycolmap.Sift(options=options, device=device)
+        elif backend == "opencv":
+            self.sift = cv2.SIFT_create(
+                contrastThreshold=self.conf.detection_threshold,
+                nfeatures=self.conf.max_num_keypoints,
+                edgeThreshold=self.conf.edge_threshold,
+                nOctaveLayers=self.conf.num_octaves,
+            )
+        else:
+            backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
+            raise ValueError(
+                f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
+            )
+
+    def extract_single_image(self, image: torch.Tensor):
+        image_np = image.cpu().numpy().squeeze(0)
+
+        if self.conf.backend.startswith("pycolmap"):
+            if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
+                detections, descriptors = self.sift.extract(image_np)
+                scores = None  # Scores are not exposed by COLMAP anymore.
+            else:
+                detections, scores, descriptors = self.sift.extract(image_np)
+            keypoints = detections[:, :2]  # Keep only (x, y).
+            scales, angles = detections[:, -2:].T
+            if scores is not None and (
+                self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
+            ):
+                # Set the scores as a combination of abs. response and scale.
+                scores = np.abs(scores) * scales
+        elif self.conf.backend == "opencv":
+            # TODO: Check if opencv keypoints are already in corner convention
+            keypoints, scores, scales, angles, descriptors = run_opencv_sift(
+                self.sift, (image_np * 255.0).astype(np.uint8)
+            )
+        pred = {
+            "keypoints": keypoints,
+            "scales": scales,
+            "oris": angles,
+            "descriptors": descriptors,
+        }
+        if scores is not None:
+            pred["keypoint_scores"] = scores
+
+        # sometimes pycolmap returns points outside the image. We remove them
+        if self.conf.backend.startswith("pycolmap"):
+            is_inside = (
+                pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
+            ).all(-1)
+            pred = {k: v[is_inside] for k, v in pred.items()}
+
+        if self.conf.nms_radius is not None:
+            keep = filter_dog_point(
+                pred["keypoints"],
+                pred["scales"],
+                pred["oris"],
+                image_np.shape,
+                self.conf.nms_radius,
+                scores=pred.get("keypoint_scores"),
+            )
+            pred = {k: v[keep] for k, v in pred.items()}
+
+        pred = {k: torch.from_numpy(v) for k, v in pred.items()}
+        if scores is not None:
+            # Keep the k keypoints with highest score
+            num_points = self.conf.max_num_keypoints
+            if num_points is not None and len(pred["keypoints"]) > num_points:
+                indices = torch.topk(pred["keypoint_scores"], num_points).indices
+                pred = {k: v[indices] for k, v in pred.items()}
+
+        return pred
+
+    def forward(self, data: dict) -> dict:
+        image = data["image"]
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+        device = image.device
+        image = image.cpu()
+        pred = []
+        for k in range(len(image)):
+            img = image[k]
+            if "image_size" in data.keys():
+                # avoid extracting points in padded areas
+                w, h = data["image_size"][k]
+                img = img[:, :h, :w]
+            p = self.extract_single_image(img)
+            pred.append(p)
+        pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
+        if self.conf.rootsift:
+            pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
+        return pred
diff --git a/lightglue/superpoint.py b/lightglue/superpoint.py
@@ -43,9 +43,10 @@
 # Adapted by Remi Pautrat, Philipp Lindenberger
 
 import torch
+from kornia.color import rgb_to_grayscale
 from torch import nn
 
-from .utils import ImagePreprocessor
+from .utils import Extractor
 
 
 def simple_nms(scores, nms_radius: int):
@@ -94,7 +95,7 @@ def sample_descriptors(keypoints, descriptors, s: int = 8):
     return descriptors
 
 
-class SuperPoint(nn.Module):
+class SuperPoint(Extractor):
     """SuperPoint Convolutional Detector and Descriptor
 
     SuperPoint: Self-Supervised Interest Point Detection and
@@ -112,17 +113,13 @@ class SuperPoint(nn.Module):
     }
 
     preprocess_conf = {
-        **ImagePreprocessor.default_conf,
         "resize": 1024,
-        "grayscale": True,
     }
 
     required_data_keys = ["image"]
 
     def __init__(self, **conf):
-        super().__init__()
-        self.conf = {**self.default_conf, **conf}
-
+        super().__init__(**conf)  # Update with default configuration.
         self.relu = nn.ReLU(inplace=True)
         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
@@ -141,24 +138,23 @@ def __init__(self, **conf):
 
         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
         self.convDb = nn.Conv2d(
-            c5, self.conf["descriptor_dim"], kernel_size=1, stride=1, padding=0
+            c5, self.conf.descriptor_dim, kernel_size=1, stride=1, padding=0
         )
 
         url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"  # noqa
         self.load_state_dict(torch.hub.load_state_dict_from_url(url))
 
-        mk = self.conf["max_num_keypoints"]
-        if mk is not None and mk <= 0:
+        if self.conf.max_num_keypoints is not None and self.conf.max_num_keypoints <= 0:
             raise ValueError("max_num_keypoints must be positive or None")
 
     def forward(self, data: dict) -> dict:
         """Compute keypoints, scores, descriptors for image"""
         for key in self.required_data_keys:
             assert key in data, f"Missing key {key} in data"
         image = data["image"]
-        if image.shape[1] == 3:  # RGB
-            scale = image.new_tensor([0.299, 0.587, 0.114]).view(1, 3, 1, 1)
-            image = (image * scale).sum(1, keepdim=True)
+        if image.shape[1] == 3:
+            image = rgb_to_grayscale(image)
+
         # Shared Encoder
         x = self.relu(self.conv1a(image))
         x = self.relu(self.conv1b(x))
@@ -179,18 +175,18 @@ def forward(self, data: dict) -> dict:
         b, _, h, w = scores.shape
         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8)
-        scores = simple_nms(scores, self.conf["nms_radius"])
+        scores = simple_nms(scores, self.conf.nms_radius)
 
         # Discard keypoints near the image borders
-        if self.conf["remove_borders"]:
-            pad = self.conf["remove_borders"]
+        if self.conf.remove_borders:
+            pad = self.conf.remove_borders
             scores[:, :pad] = -1
             scores[:, :, :pad] = -1
             scores[:, -pad:] = -1
             scores[:, :, -pad:] = -1
 
         # Extract keypoints
-        best_kp = torch.where(scores > self.conf["detection_threshold"])
+        best_kp = torch.where(scores > self.conf.detection_threshold)
         scores = scores[best_kp]
 
         # Separate into batches
@@ -200,11 +196,11 @@ def forward(self, data: dict) -> dict:
         scores = [scores[best_kp[0] == i] for i in range(b)]
 
         # Keep the k keypoints with highest score
-        if self.conf["max_num_keypoints"] is not None:
+        if self.conf.max_num_keypoints is not None:
             keypoints, scores = list(
                 zip(
                     *[
-                        top_k_keypoints(k, s, self.conf["max_num_keypoints"])
+                        top_k_keypoints(k, s, self.conf.max_num_keypoints)
                         for k, s in zip(keypoints, scores)
                     ]
                 )
@@ -229,15 +225,3 @@ def forward(self, data: dict) -> dict:
             "keypoint_scores": torch.stack(scores, 0),
             "descriptors": torch.stack(descriptors, 0).transpose(-1, -2).contiguous(),
         }
-
-    def extract(self, img: torch.Tensor, **conf) -> dict:
-        """Perform extraction with online resizing"""
-        if img.dim() == 3:
-            img = img[None]  # add batch dim
-        assert img.dim() == 4 and img.shape[0] == 1
-        shape = img.shape[-2:][::-1]
-        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
-        feats = self.forward({"image": img})
-        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
-        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
-        return feats
diff --git a/lightglue/utils.py b/lightglue/utils.py
@@ -16,7 +16,6 @@ class ImagePreprocessor:
         "interpolation": "bilinear",
         "align_corners": None,
         "antialias": True,
-        "grayscale": False,  # convert rgb to grayscale
     }
 
     def __init__(self, **conf) -> None:
@@ -36,10 +35,6 @@ def __call__(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
                 align_corners=self.conf.align_corners,
             )
         scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
-        if self.conf.grayscale and img.shape[-3] == 3:
-            img = kornia.color.rgb_to_grayscale(img)
-        elif not self.conf.grayscale and img.shape[-3] == 1:
-            img = kornia.color.grayscale_to_rgb(img)
         return img, scale
 
 
@@ -133,6 +128,25 @@ def load_image(path: Path, resize: int = None, **kwargs) -> torch.Tensor:
     return numpy_image_to_torch(image)
 
 
+class Extractor(torch.nn.Module):
+    def __init__(self, **conf):
+        super().__init__()
+        self.conf = SimpleNamespace(**{**self.default_conf, **conf})
+
+    @torch.no_grad()
+    def extract(self, img: torch.Tensor, **conf) -> dict:
+        """Perform extraction with online resizing"""
+        if img.dim() == 3:
+            img = img[None]  # add batch dim
+        assert img.dim() == 4 and img.shape[0] == 1
+        shape = img.shape[-2:][::-1]
+        img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
+        feats = self.forward({"image": img})
+        feats["image_size"] = torch.tensor(shape)[None].to(img).float()
+        feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
+        return feats
+
+
 def match_pair(
     extractor,
     matcher,