Skip to content

Commit

Permalink
Improve docs for rainbow visualization
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 580880342
Change-Id: I6feecd8717d783ffb8cb31d7f969e9c3af96cb0e
  • Loading branch information
cdoersch committed Nov 9, 2023
1 parent 6102867 commit 0d9acd4
Showing 1 changed file with 33 additions and 14 deletions.
47 changes: 33 additions & 14 deletions colabs/tapir_rainbow_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@
"\u003c/p\u003e\n",
"\n",
"\u003cp align=\"center\"\u003e\n",
" \u003cimg src=\"https://storage.googleapis.com/dm-tapnet/horsejump_rainbow.gif\" width=\"70%\"/\u003e\n",
" \u003cimg src=\"https://storage.googleapis.com/dm-tapnet/horsejump_rainbow.gif\" width=\"70%\"/\u003e\u003cbr/\u003e\u003cbr/\u003e\n",
"\u003c/p\u003e\n",
"\u003cp\u003e\n",
" This visualization uses TAPIR to show how an object moves through space, even if the camera is tracking the object. It begins by tracking points densely on a grid. Then it estimates the camera motion as a homography (i.e., assuming either planar background or camera that rotates but does not move). Any points that move according to that homography are removed. Then we generate a \u0026ldquo;rainbow\u0026rdquo; visualization, where the tracked points leave \u0026ldquo;tails\u0026rdquo; that follow the camera motion, so it looks like the earlier positions of points are frozen in space. This visualization was inspired by a similar one from \u003ca href=\"https://omnimotion.github.io/\"\u003eOmniMotion\u003c/a\u003e, although that one assumes ground-truth segmentations are available and models the camera as only 2D translation.\n",
"\u003c/p\u003e\n",
"\u003cp\u003e\n",
" Note that we consider this algorithm \u0026ldquo;semi-automatic\u0026rdquo; because you may need some tuning for pleasing results on arbitrary videos. Tracking failures on the background may show up as foreground objects. Results are sensitive to the outlier thresholds used in RANSAC and segmentation, and you may wish to discard short tracks. You can sample in a different way (e.g. sampling points from multiple frames) and everything will work, but the \u003cfont face=\"Courier\"\u003eplot_tracks_tails\u003c/font\u003e function uses the input order of the points to choose colors, so you will have to sort the points appropriately.\n",
"\u003c/p\u003e\n"
]
},
Expand Down Expand Up @@ -197,15 +203,6 @@
" return points"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "b7X5ZNCpuemg"
},
"source": [
"## Inference on DAVIS"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -239,10 +236,11 @@
"resize_height = 512 # @param {type: \"integer\"}\n",
"resize_width = 512 # @param {type: \"integer\"}\n",
"stride = 16 # @param {type: \"integer\"}\n",
"query_frame = 0 # @param {type: \"integer\"}\n",
"\n",
"height, width = orig_frames.shape[1:3]\n",
"frames = media.resize_video(orig_frames, (resize_height, resize_width))\n",
"query_points = sample_grid_points(0, resize_height, resize_width, stride)\n",
"query_points = sample_grid_points(query_frame, resize_height, resize_width, stride)\n",
"batch_size = 64\n",
"tracks = []\n",
"visibles = []\n",
Expand Down Expand Up @@ -275,23 +273,44 @@
},
"outputs": [],
"source": [
"# The inlier point threshold for ransac, specified in normalized coordinates\n",
"# (points are rescaled to the range [0, 1] for optimization).\n",
"ransac_inlier_threshold = 0.07 # @param {type: \"number\"}\n",
"# What fraction of points need to be inliers for RANSAC to consider a trajectory\n",
"# to be trustworthy for estimating the homography.\n",
"ransac_track_inlier_frac = 0.95 # @param {type: \"number\"}\n",
"# After initial RANSAC, how many refinement passes to adjust the homographies\n",
"# based on tracks that have been deemed trustworthy.\n",
"num_refinement_passes = 2 # @param {type: \"number\"}\n",
"# After homographies are estimated, consider points to be outliers if they are\n",
"# further than this threshold.\n",
"foreground_inlier_threshold = 0.07 # @param {type: \"number\"}\n",
"# After homographies are estimated, consider tracks to be part of the foreground\n",
"# if less than this fraction of its points are inliers.\n",
"foreground_frac = 0.6 # @param {type: \"number\"}\n",
"\n",
"\n",
"occluded = 1.0 - visibles\n",
"homogs, err, canonical = viz_utils.get_homographies_wrt_frame(\n",
" tracks,\n",
" occluded,\n",
" [width, height]\n",
" thresh=ransac_inlier_threshold,\n",
" outlier_point_threshold=ransac_track_inlier_frac,\n",
" num_refinement_passes=num_refinement_passes,\n",
")\n",
"\n",
"inlier_ct = np.sum((err \u003c np.square(0.07)) * visibles, axis=-1)\n",
"inliers = (err \u003c np.square(foreground_inlier_threshold)) * visibles\n",
"inlier_ct = np.sum(inliers, axis=-1)\n",
"ratio = inlier_ct / np.maximum(1.0, np.sum(visibles, axis=1))\n",
"is_fg = ratio \u003c= 0.60\n",
"is_fg = ratio \u003c= foreground_frac\n",
"video = viz_utils.plot_tracks_tails(\n",
" orig_frames,\n",
" tracks[is_fg],\n",
" occluded[is_fg],\n",
" homogs\n",
")\n",
"media.show_video(video, fps=16)"
"media.show_video(video, fps=24)"
]
}
],
Expand Down

0 comments on commit 0d9acd4

Please sign in to comment.