From 4af689ebff0742e5471e54e364cd7e234f0f37ca Mon Sep 17 00:00:00 2001 From: Bijan Haney Date: Mon, 25 Nov 2024 13:41:25 -0500 Subject: [PATCH] add relative crop to transform camera and overlay crop box (#4582) --- components/camera/transformpipeline/mods.go | 90 ++++++++++++++++--- .../camera/transformpipeline/mods_test.go | 71 +++++++++++++++ 2 files changed, 151 insertions(+), 10 deletions(-) diff --git a/components/camera/transformpipeline/mods.go b/components/camera/transformpipeline/mods.go index 776d51856fa..e9412d0ad85 100644 --- a/components/camera/transformpipeline/mods.go +++ b/components/camera/transformpipeline/mods.go @@ -2,6 +2,7 @@ package transformpipeline import ( "context" + "fmt" "image" "image/color" @@ -16,6 +17,7 @@ import ( "go.viam.com/rdk/rimage" "go.viam.com/rdk/rimage/transform" "go.viam.com/rdk/utils" + "go.viam.com/rdk/vision/objectdetection" ) // rotateConfig are the attributes for a rotate transform. @@ -158,16 +160,20 @@ func (rs *resizeSource) Close(ctx context.Context) error { // cropConfig are the attributes for a crop transform. type cropConfig struct { - XMin int `json:"x_min_px"` - YMin int `json:"y_min_px"` - XMax int `json:"x_max_px"` - YMax int `json:"y_max_px"` + XMin float64 `json:"x_min_px"` + YMin float64 `json:"y_min_px"` + XMax float64 `json:"x_max_px"` + YMax float64 `json:"y_max_px"` + ShowCropBox bool `json:"overlay_crop_box"` } type cropSource struct { originalStream gostream.VideoStream imgType camera.ImageType cropWindow image.Rectangle + cropRel []float64 + showCropBox bool + imgBounds image.Rectangle } // newCropTransform creates a new crop transform. @@ -187,9 +193,32 @@ func newCropTransform( if conf.YMin >= conf.YMax { return nil, camera.UnspecifiedStream, errors.New("cannot crop image to 0 height (y_min is >= y_max)") } - cropRect := image.Rect(conf.XMin, conf.YMin, conf.XMax, conf.YMax) + cropRect := image.Rectangle{} + cropRel := []float64{} + switch { + case conf.XMax == 1.0 && conf.YMax == 1.0 && conf.XMin == 0.0 && conf.YMin == 0.0: + // interpreting this to mean cropping to the upper left pixel + // you wouldn't use crop if you weren't gonna crop your image + cropRect = image.Rect(0, 0, 1, 1) + case conf.XMax > 1.0 || conf.YMax > 1.0: + // you're not using relative boundaries if either max value is greater than 1 + cropRect = image.Rect(int(conf.XMin), int(conf.YMin), int(conf.XMax), int(conf.YMax)) + default: + // everything else assumes relative boundaries + if conf.XMin > 1.0 || conf.YMin > 1.0 { // but rel values cannot be greater than 1.0 + return nil, camera.UnspecifiedStream, + errors.New("if using relative bounds between 0 and 1 for cropping, all crop attributes must be between 0 and 1") + } + cropRel = []float64{conf.XMin, conf.YMin, conf.XMax, conf.YMax} + } - reader := &cropSource{gostream.NewEmbeddedVideoStream(source), stream, cropRect} + reader := &cropSource{ + originalStream: gostream.NewEmbeddedVideoStream(source), + imgType: stream, + cropWindow: cropRect, + cropRel: cropRel, + showCropBox: conf.ShowCropBox, + } src, err := camera.NewVideoSourceFromReader(ctx, reader, nil, stream) if err != nil { return nil, camera.UnspecifiedStream, err @@ -197,6 +226,24 @@ func newCropTransform( return src, stream, err } +func (cs *cropSource) relToAbsCrop(img image.Image) image.Rectangle { + xMin, yMin, xMax, yMax := cs.cropRel[0], cs.cropRel[1], cs.cropRel[2], cs.cropRel[3] + // Get image bounds + bounds := img.Bounds() + width := bounds.Dx() + height := bounds.Dy() + + // Convert relative coordinates to absolute pixels + x1 := bounds.Min.X + int(xMin*float64(width)) + y1 := bounds.Min.Y + int(yMin*float64(height)) + x2 := bounds.Min.X + int(xMax*float64(width)) + y2 := bounds.Min.Y + int(yMax*float64(height)) + + // Create cropping rectangle + rect := image.Rect(x1, y1, x2, y2) + return rect +} + // Read crops the 2D image depending on the crop window. func (cs *cropSource) Read(ctx context.Context) (image.Image, func(), error) { ctx, span := trace.StartSpan(ctx, "camera::transformpipeline::crop::Read") @@ -205,14 +252,37 @@ func (cs *cropSource) Read(ctx context.Context) (image.Image, func(), error) { if err != nil { return nil, nil, err } + if cs.imgBounds.Empty() { + cs.imgBounds = orig.Bounds() + } + // check to see if the image size changed, and if the relative crop needs to be redone + if cs.imgBounds != orig.Bounds() && len(cs.cropRel) != 0 { + cs.cropWindow = image.Rectangle{} // reset the crop box + } + if cs.cropWindow.Empty() && len(cs.cropRel) != 0 { + cs.cropWindow = cs.relToAbsCrop(orig) + } switch cs.imgType { case camera.ColorStream, camera.UnspecifiedStream: - newImg := imaging.Crop(orig, cs.cropWindow) - if newImg.Bounds().Empty() { - return nil, nil, errors.New("crop transform cropped image to 0 pixels") + if cs.showCropBox { + newDet := objectdetection.NewDetection(cs.cropWindow, 1.0, "crop") + dets := []objectdetection.Detection{newDet} + newImg, err := objectdetection.Overlay(orig, dets) + if err != nil { + return nil, nil, fmt.Errorf("could not overlay crop box: %w", err) + } + return newImg, release, nil + } else { + newImg := imaging.Crop(orig, cs.cropWindow) + if newImg.Bounds().Empty() { + return nil, nil, errors.New("crop transform cropped image to 0 pixels") + } + return newImg, release, nil } - return newImg, release, nil case camera.DepthStream: + if cs.showCropBox { + return nil, nil, errors.New("crop box overlay not supported for depth images") + } dm, err := rimage.ConvertImageToDepthMap(ctx, orig) if err != nil { return nil, nil, err diff --git a/components/camera/transformpipeline/mods_test.go b/components/camera/transformpipeline/mods_test.go index 721319a9bdf..d5da2fae128 100644 --- a/components/camera/transformpipeline/mods_test.go +++ b/components/camera/transformpipeline/mods_test.go @@ -64,6 +64,7 @@ func TestCrop(t *testing.T) { test.That(t, out.Bounds().Dy(), test.ShouldEqual, 10) test.That(t, out, test.ShouldHaveSameTypeAs, &image.NRGBA{}) test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + // crop has limits bigger than the image dimensions, but just takes the window am = utils.AttributeMap{"x_min_px": 127, "x_max_px": 150, "y_min_px": 71, "y_max_px": 110} rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am) @@ -75,6 +76,76 @@ func TestCrop(t *testing.T) { test.That(t, out.Bounds().Dy(), test.ShouldEqual, 1) test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + // relative crop + dummyImg := image.NewRGBA(image.Rect(0, 0, 100, 100)) + source = gostream.NewVideoSource(&fake.StaticSource{ColorImg: dummyImg}, prop.Video{}) + out, _, err = camera.ReadImage(context.Background(), source) + test.That(t, err, test.ShouldBeNil) + test.That(t, out.Bounds().Dx(), test.ShouldEqual, 100) + test.That(t, out.Bounds().Dy(), test.ShouldEqual, 100) + am = utils.AttributeMap{"x_min_px": 0.2, "x_max_px": 0.4, "y_min_px": 0.3, "y_max_px": 0.99} + rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am) + test.That(t, err, test.ShouldBeNil) + test.That(t, stream, test.ShouldEqual, camera.ColorStream) + out, _, err = camera.ReadImage(context.Background(), rs) + test.That(t, err, test.ShouldBeNil) + test.That(t, out.Bounds().Dx(), test.ShouldEqual, 20) + test.That(t, out.Bounds().Dy(), test.ShouldEqual, 69) + test.That(t, out, test.ShouldHaveSameTypeAs, &image.NRGBA{}) + test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + + // the edge case of cropping to one pixel + am = utils.AttributeMap{ + "x_min_px": 0.0, + "x_max_px": 1.0, + "y_min_px": 0.0, + "y_max_px": 1.0, + } + rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am) + test.That(t, err, test.ShouldBeNil) + test.That(t, stream, test.ShouldEqual, camera.ColorStream) + out, _, err = camera.ReadImage(context.Background(), rs) + test.That(t, err, test.ShouldBeNil) + test.That(t, out.Bounds().Dx(), test.ShouldEqual, 1) + test.That(t, out.Bounds().Dy(), test.ShouldEqual, 1) + test.That(t, out, test.ShouldHaveSameTypeAs, &image.NRGBA{}) + test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + + // quadrant cropping + am = utils.AttributeMap{ + "x_min_px": 0.5, + "x_max_px": 1.0, + "y_min_px": 0.5, + "y_max_px": 1.0, + } + rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am) + test.That(t, err, test.ShouldBeNil) + test.That(t, stream, test.ShouldEqual, camera.ColorStream) + out, _, err = camera.ReadImage(context.Background(), rs) + test.That(t, err, test.ShouldBeNil) + test.That(t, out.Bounds().Dx(), test.ShouldEqual, 50) + test.That(t, out.Bounds().Dy(), test.ShouldEqual, 50) + test.That(t, out, test.ShouldHaveSameTypeAs, &image.NRGBA{}) + test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + + // relative crop but you just overlay the box + am = utils.AttributeMap{ + "x_min_px": 0.2, + "x_max_px": 0.4, + "y_min_px": 0.3, + "y_max_px": 0.99, + "overlay_crop_box": true, + } + rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am) + test.That(t, err, test.ShouldBeNil) + test.That(t, stream, test.ShouldEqual, camera.ColorStream) + out, _, err = camera.ReadImage(context.Background(), rs) + test.That(t, err, test.ShouldBeNil) + test.That(t, out.Bounds().Dx(), test.ShouldEqual, 100) + test.That(t, out.Bounds().Dy(), test.ShouldEqual, 100) + test.That(t, out, test.ShouldHaveSameTypeAs, &image.NRGBA{}) + test.That(t, rs.Close(context.Background()), test.ShouldBeNil) + // error - crop limits are outside of original image am = utils.AttributeMap{"x_min_px": 1000, "x_max_px": 2000, "y_min_px": 300, "y_max_px": 400} rs, stream, err = newCropTransform(context.Background(), source, camera.ColorStream, am)