Skip to content

Commit 567f753

Browse files
committed
pinned_image_set: Retry aggressively with podman manifest inspect to prevent transient failures
1 parent cb7cd68 commit 567f753

File tree

1 file changed

+25
-9
lines changed

1 file changed

+25
-9
lines changed

pkg/daemon/pinned_image_set.go

+25-9
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,11 @@ const (
7373
)
7474

7575
var (
76-
errInsufficientStorage = errors.New("storage available is less than minimum required")
77-
errFailedToPullImage = errors.New("failed to pull image")
78-
errNotFound = errors.New("not found")
79-
errRequeueAfterTimeout = errors.New("requeue: prefetching images incomplete after timeout")
76+
errInsufficientStorage = errors.New("storage available is less than minimum required")
77+
errFailedToPullImage = errors.New("failed to pull image")
78+
errFailedToManifestInspect = errors.New("failed to execute podman manifest inspect")
79+
errNotFound = errors.New("not found")
80+
errRequeueAfterTimeout = errors.New("requeue: prefetching images incomplete after timeout")
8081
)
8182

8283
// PinnedImageSetManager manages the prefetching of images.
@@ -1139,12 +1140,27 @@ func (p *PinnedImageSetManager) getImageSize(ctx context.Context, imageName, aut
11391140
imageName,
11401141
}
11411142

1142-
output, err := exec.CommandContext(ctx, "podman", args...).CombinedOutput()
1143-
if err != nil && strings.Contains(err.Error(), "manifest unknown") {
1144-
return 0, errNotFound
1145-
}
1143+
var lastErr error
1144+
tries := 0
1145+
var output []byte
1146+
err := wait.PollUntilContextCancel(ctx, 2*time.Second, true, func(ctx context.Context) (bool, error) {
1147+
tries++
1148+
1149+
output, err := exec.CommandContext(ctx, "podman", args...).CombinedOutput()
1150+
lastErr = err
1151+
if err != nil && strings.Contains(err.Error(), "manifest unknown") {
1152+
return true, errNotFound
1153+
}
1154+
if err != nil {
1155+
klog.Infof("%w %q: Output:%s Error:%w", errFailedToManifestInspect, imageName, output, err)
1156+
return false, nil
1157+
}
1158+
1159+
return true, nil
1160+
})
1161+
// this is only an error if ctx has error or limits are exceeded
11461162
if err != nil {
1147-
return 0, fmt.Errorf("failed to execute podman manifest inspect for %q: Output:%s Error:%w", imageName, output, err)
1163+
return 0, fmt.Errorf("%w %q (%d tries): %w: %w", errFailedToManifestInspect, imageName, tries, err, lastErr)
11481164
}
11491165

11501166
var manifest ocispec.Manifest

0 commit comments

Comments
 (0)