Skip to content

Commit

Permalink
Retry DELTA_APPLY_RETRY_COUNT (3) times during delta apply fail befor…
Browse files Browse the repository at this point in the history
…e reverting to regular pull

This prevents an image download error loop where the delta image on the delta server is present,
but some aspect of the delta image or the base image on the device does not match up, causing
the delta to fail to be applied to the base image.

Upon delta apply errors exceeding DELTA_APPLY_RETRY_COUNT, revert to a regular pull.

Change-type: patch
Signed-off-by: Christina Ying Wang <[email protected]>
  • Loading branch information
cywang117 committed Jan 13, 2025
1 parent 8b74b94 commit 37b88f7
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 11 deletions.
64 changes: 53 additions & 11 deletions src/lib/docker-utils.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import type { ProgressCallback } from 'docker-progress';
import { DockerProgress } from 'docker-progress';
import type { ProgressCallback } from 'docker-progress';
import Dockerode from 'dockerode';
import _ from 'lodash';
import memoizee from 'memoizee';

import { applyDelta, OutOfSyncError } from 'docker-delta';

import type { SchemaReturn } from '../config/schema-type';
import log from './supervisor-console';
import { envArrayToObject } from './conversions';
import * as request from './request';
import {
DeltaStillProcessingError,
ImageAuthenticationError,
InvalidNetGatewayError,
DeltaServerError,
DeltaApplyError,
isStatusError,
} from './errors';
import * as request from './request';
import type { StatusError } from './errors';
import type { EnvVarObject } from '../types';

import log from './supervisor-console';
import type { SchemaReturn } from '../config/schema-type';

export type FetchOptions = SchemaReturn<'fetchOptions'>;
export type DeltaFetchOptions = FetchOptions & {
Expand All @@ -42,6 +43,18 @@ type ImageNameParts = {
// (10 mins)
const DELTA_TOKEN_TIMEOUT = 10 * 60 * 1000;

// How many times to retry a v3 delta apply before falling back to a regular pull.
// A delta is applied to the base image when pulling, so a failure could be due to
// "layers from manifest don't match image configuration", which can occur before
// or after downloading delta image layers.
//
// Other causes of failure have not been documented as clearly as "layers from manifest"
// but could manifest as well, though unclear if they occur before, after, or during
// downloading delta image layers.
//
// See: https://github.com/balena-os/balena-engine/blob/master/distribution/pull_v2.go#L43
const DELTA_APPLY_RETRY_COUNT = 3;

export const docker = new Dockerode();
export const dockerProgress = new DockerProgress({
docker,
Expand Down Expand Up @@ -140,7 +153,7 @@ export async function fetchDeltaWithProgress(
}

// Since the supevisor never calls this function with a source anymore,
// this should never happen, but w ehandle it anyway
// this should never happen, but we handle it anyway
if (deltaOpts.deltaSource == null) {
logFn('Falling back to regular pull due to lack of a delta source');
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
Expand Down Expand Up @@ -226,29 +239,58 @@ export async function fetchDeltaWithProgress(
`Got an error when parsing delta server response for v3 delta: ${e}`,
);
}
id = await applyBalenaDelta(name, token, onProgress, logFn);
// Try to apply delta DELTA_APPLY_RETRY_COUNT times, then throw DeltaApplyError
let lastError: StatusError | undefined = undefined;
for (
let tryCount = 0;
tryCount < DELTA_APPLY_RETRY_COUNT;
tryCount++
) {
try {
id = await applyBalenaDelta(name, token, onProgress, logFn);
break;
} catch (e) {
if (!isStatusError(e)) {
throw e;
}
lastError = e;
logFn(
`Delta apply failed, retrying (${tryCount + 1}/${DELTA_APPLY_RETRY_COUNT})...`,
);
}
}
if (lastError) {
const { statusCode, statusMessage } = lastError;
throw new DeltaApplyError(statusCode, statusMessage);
}
}
break;
default:
throw new Error(`Unsupported delta version: ${deltaOpts.deltaVersion}`);
}
} catch (e) {
// Log appropriate message based on error type
if (e instanceof OutOfSyncError) {
logFn('Falling back to regular pull due to delta out of sync error');
return await fetchImageWithProgress(imgDest, deltaOpts, onProgress);
} else if (e instanceof DeltaServerError) {
logFn(
`Falling back to regular pull due to delta server error (${e.statusCode})${e.statusMessage ? `: ${e.statusMessage}` : ''}`,
);
return await fetchImageWithProgress(imgDest, deltaOpts, onProgress);
} else if (e instanceof DeltaApplyError) {
logFn(
`Falling back to regular pull due to delta apply error (${e.statusCode})${e.statusMessage ? `: ${e.statusMessage}` : ''}`,
);
} else {
logFn(`Delta failed with ${e}`);
throw e;
}

// For handled errors, fall back to regular pull
return fetchImageWithProgress(imgDest, deltaOpts, onProgress);
}

logFn(`Delta applied successfully`);
return id;
return id!;
}

export async function fetchImageWithProgress(
Expand Down
2 changes: 2 additions & 0 deletions src/lib/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ export class DeltaStillProcessingError extends TypedError {}

export class DeltaServerError extends StatusError {}

export class DeltaApplyError extends StatusError {}

export class UpdatesLockedError extends TypedError {}

export function isHttpConflictError(err: { statusCode: number }): boolean {
Expand Down

0 comments on commit 37b88f7

Please sign in to comment.