-
Notifications
You must be signed in to change notification settings - Fork 380
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SIFT and ALIKED extractors + weights (#75)
Release of SIFT+LightGlue and ALIKED+LightGlue weights #5, #51 --------- Co-authored-by: Paul-Edouard Sarlin <[email protected]>
Showing
8 changed files
with
1,057 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
from .aliked import ALIKED # noqa | ||
from .disk import DISK # noqa | ||
from .lightglue import LightGlue # noqa | ||
from .sift import SIFT # noqa | ||
from .superpoint import SuperPoint # noqa | ||
from .utils import match_pair # noqa |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
import warnings | ||
|
||
import cv2 | ||
import numpy as np | ||
import torch | ||
from kornia.color import rgb_to_grayscale | ||
from packaging import version | ||
|
||
try: | ||
import pycolmap | ||
except ImportError: | ||
pycolmap = None | ||
|
||
from .utils import Extractor | ||
|
||
|
||
def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None): | ||
h, w = image_shape | ||
ij = np.round(points - 0.5).astype(int).T[::-1] | ||
|
||
# Remove duplicate points (identical coordinates). | ||
# Pick highest scale or score | ||
s = scales if scores is None else scores | ||
buffer = np.zeros((h, w)) | ||
np.maximum.at(buffer, tuple(ij), s) | ||
keep = np.where(buffer[tuple(ij)] == s)[0] | ||
|
||
# Pick lowest angle (arbitrary). | ||
ij = ij[:, keep] | ||
buffer[:] = np.inf | ||
o_abs = np.abs(angles[keep]) | ||
np.minimum.at(buffer, tuple(ij), o_abs) | ||
mask = buffer[tuple(ij)] == o_abs | ||
ij = ij[:, mask] | ||
keep = keep[mask] | ||
|
||
if nms_radius > 0: | ||
# Apply NMS on the remaining points | ||
buffer[:] = 0 | ||
buffer[tuple(ij)] = s[keep] # scores or scale | ||
|
||
local_max = torch.nn.functional.max_pool2d( | ||
torch.from_numpy(buffer).unsqueeze(0), | ||
kernel_size=nms_radius * 2 + 1, | ||
stride=1, | ||
padding=nms_radius, | ||
).squeeze(0) | ||
is_local_max = buffer == local_max.numpy() | ||
keep = keep[is_local_max[tuple(ij)]] | ||
return keep | ||
|
||
|
||
def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor: | ||
x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps) | ||
x.clip_(min=eps).sqrt_() | ||
return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps) | ||
|
||
|
||
def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray: | ||
""" | ||
Detect keypoints using OpenCV Detector. | ||
Optionally, perform description. | ||
Args: | ||
features: OpenCV based keypoints detector and descriptor | ||
image: Grayscale image of uint8 data type | ||
Returns: | ||
keypoints: 1D array of detected cv2.KeyPoint | ||
scores: 1D array of responses | ||
descriptors: 1D array of descriptors | ||
""" | ||
detections, descriptors = features.detectAndCompute(image, None) | ||
points = np.array([k.pt for k in detections], dtype=np.float32) | ||
scores = np.array([k.response for k in detections], dtype=np.float32) | ||
scales = np.array([k.size for k in detections], dtype=np.float32) | ||
angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32)) | ||
return points, scores, scales, angles, descriptors | ||
|
||
|
||
class SIFT(Extractor): | ||
default_conf = { | ||
"rootsift": True, | ||
"nms_radius": 0, # None to disable filtering entirely. | ||
"max_num_keypoints": 4096, | ||
"backend": "opencv", # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda} | ||
"detection_threshold": 0.0066667, # from COLMAP | ||
"edge_threshold": 10, | ||
"first_octave": -1, # only used by pycolmap, the default of COLMAP | ||
"num_octaves": 4, | ||
} | ||
|
||
preprocess_conf = { | ||
"resize": 1024, | ||
} | ||
|
||
required_data_keys = ["image"] | ||
|
||
def __init__(self, **conf): | ||
super().__init__(**conf) # Update with default configuration. | ||
backend = self.conf.backend | ||
if backend.startswith("pycolmap"): | ||
if pycolmap is None: | ||
raise ImportError( | ||
"Cannot find module pycolmap: install it with pip" | ||
"or use backend=opencv." | ||
) | ||
options = { | ||
"peak_threshold": self.conf.detection_threshold, | ||
"edge_threshold": self.conf.edge_threshold, | ||
"first_octave": self.conf.first_octave, | ||
"num_octaves": self.conf.num_octaves, | ||
"normalization": pycolmap.Normalization.L2, # L1_ROOT is buggy. | ||
} | ||
device = ( | ||
"auto" if backend == "pycolmap" else backend.replace("pycolmap_", "") | ||
) | ||
if ( | ||
backend == "pycolmap_cpu" or not pycolmap.has_cuda | ||
) and pycolmap.__version__ < "0.5.0": | ||
warnings.warn( | ||
"The pycolmap CPU SIFT is buggy in version < 0.5.0, " | ||
"consider upgrading pycolmap or use the CUDA version.", | ||
stacklevel=1, | ||
) | ||
else: | ||
options["max_num_features"] = self.conf.max_num_keypoints | ||
self.sift = pycolmap.Sift(options=options, device=device) | ||
elif backend == "opencv": | ||
self.sift = cv2.SIFT_create( | ||
contrastThreshold=self.conf.detection_threshold, | ||
nfeatures=self.conf.max_num_keypoints, | ||
edgeThreshold=self.conf.edge_threshold, | ||
nOctaveLayers=self.conf.num_octaves, | ||
) | ||
else: | ||
backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"} | ||
raise ValueError( | ||
f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}." | ||
) | ||
|
||
def extract_single_image(self, image: torch.Tensor): | ||
image_np = image.cpu().numpy().squeeze(0) | ||
|
||
if self.conf.backend.startswith("pycolmap"): | ||
if version.parse(pycolmap.__version__) >= version.parse("0.5.0"): | ||
detections, descriptors = self.sift.extract(image_np) | ||
scores = None # Scores are not exposed by COLMAP anymore. | ||
else: | ||
detections, scores, descriptors = self.sift.extract(image_np) | ||
keypoints = detections[:, :2] # Keep only (x, y). | ||
scales, angles = detections[:, -2:].T | ||
if scores is not None and ( | ||
self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda | ||
): | ||
# Set the scores as a combination of abs. response and scale. | ||
scores = np.abs(scores) * scales | ||
elif self.conf.backend == "opencv": | ||
# TODO: Check if opencv keypoints are already in corner convention | ||
keypoints, scores, scales, angles, descriptors = run_opencv_sift( | ||
self.sift, (image_np * 255.0).astype(np.uint8) | ||
) | ||
pred = { | ||
"keypoints": keypoints, | ||
"scales": scales, | ||
"oris": angles, | ||
"descriptors": descriptors, | ||
} | ||
if scores is not None: | ||
pred["keypoint_scores"] = scores | ||
|
||
# sometimes pycolmap returns points outside the image. We remove them | ||
if self.conf.backend.startswith("pycolmap"): | ||
is_inside = ( | ||
pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]]) | ||
).all(-1) | ||
pred = {k: v[is_inside] for k, v in pred.items()} | ||
|
||
if self.conf.nms_radius is not None: | ||
keep = filter_dog_point( | ||
pred["keypoints"], | ||
pred["scales"], | ||
pred["oris"], | ||
image_np.shape, | ||
self.conf.nms_radius, | ||
scores=pred.get("keypoint_scores"), | ||
) | ||
pred = {k: v[keep] for k, v in pred.items()} | ||
|
||
pred = {k: torch.from_numpy(v) for k, v in pred.items()} | ||
if scores is not None: | ||
# Keep the k keypoints with highest score | ||
num_points = self.conf.max_num_keypoints | ||
if num_points is not None and len(pred["keypoints"]) > num_points: | ||
indices = torch.topk(pred["keypoint_scores"], num_points).indices | ||
pred = {k: v[indices] for k, v in pred.items()} | ||
|
||
return pred | ||
|
||
def forward(self, data: dict) -> dict: | ||
image = data["image"] | ||
if image.shape[1] == 3: | ||
image = rgb_to_grayscale(image) | ||
device = image.device | ||
image = image.cpu() | ||
pred = [] | ||
for k in range(len(image)): | ||
img = image[k] | ||
if "image_size" in data.keys(): | ||
# avoid extracting points in padded areas | ||
w, h = data["image_size"][k] | ||
img = img[:, :h, :w] | ||
p = self.extract_single_image(img) | ||
pred.append(p) | ||
pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]} | ||
if self.conf.rootsift: | ||
pred["descriptors"] = sift_to_rootsift(pred["descriptors"]) | ||
return pred |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters