Skip to content

Commit

Permalink
Merge pull request #53 from aws-solutions-library-samples/52-video-an…
Browse files Browse the repository at this point in the history
…alysis-fails-for-short-form-video-5-seconds-long

bugfixes: #51, #52
  • Loading branch information
aws-kens authored Apr 11, 2024
2 parents c0dc3d6 + 32261a7 commit 8f2d05f
Show file tree
Hide file tree
Showing 14 changed files with 140 additions and 13 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [4.0.1] - 2024-04-11
### Bugfixes
- AWS Elemental MediaConvert does not create frameCapture group when s3://[PROXY_BUCKET]/_settings/aioption.json is missing.
- Short form video (5s) fails the analysis.
- Rephrase the Version Compatibility input parameter on the CFN template to be more clear.

### New features
- added Top 5 most relevant tags at the scene level


## [4.0.0] - 2024-03-06
### New features
- Dynamic frame analysis workflow
Expand Down
2 changes: 2 additions & 0 deletions deployment/media2cloud-backend-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3519,6 +3519,8 @@ Resources:
- bOpenSearchServerless
- 1
- 0
ENV_DEFAULT_AI_OPTIONS: !Ref DefaultAIOptions
ENV_DEFAULT_MINCONFIDENCE: !Ref DefaultMinConfidence
ENV_AI_OPTIONS_S3KEY: !Ref AIOptionsS3Key

IngestMainStateMachine:
Expand Down
5 changes: 2 additions & 3 deletions deployment/media2cloud.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ Parameters:
VersionCompatibilityStatement:
Type: String
Description: The new Version 4 of Media2Cloud is not compatible with previous versions due to several optimization changes. These changes include the Amazon OpenSearch cluster indexes and consolidation of the generated metadata. While a migration tool is being developed to help customers migrate from previous versions to Version 4, this CloudFormation template SHOULD NOT be used to update your existing Media2Cloud V3 deployment to the latest version. Confirm that you have read and understand the version compatibility statement. If you are creating a new stack, select "Yes, I understand and proceed".
Default: No, do not proceed
AllowedValues:
- Yes, I understand and proceed
- No, do not proceed
Expand All @@ -171,7 +170,7 @@ Metadata:
ParameterGroups:
-
Label:
default: Version Compatibility
default: PLEASE READ AND SELECT AN ANSWER
Parameters:
- VersionCompatibilityStatement
-
Expand Down Expand Up @@ -214,7 +213,7 @@ Metadata:
- BedrockModel
ParameterLabels:
VersionCompatibilityStatement:
default: Please read the following statement
default: Version compatibility
Email:
default: Email
PriceClass:
Expand Down
2 changes: 1 addition & 1 deletion source/api/lib/operations/genai/claude.js
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ function _createCustomPrompt(options) {
const transcript = _textInput(options);
messages.push({
role: 'user',
content: `Transcript in <transcript> tag:\n<transcript>${transcript}\n</transcript>`,
content: `Transcript in <transcript> tag:\n<transcript>${transcript}\n</transcript>\n${options.prompt}`,
});

messages.push({
Expand Down
2 changes: 1 addition & 1 deletion source/custom-resources/lib/versionCompatibility/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ exports.CheckVersionCompatibilityStatement = async (event, context) => {
return x0.responseData;
}

let consent = event.ResourceProperties.Data.VersionCompatibilityStatement;
let consent = event.ResourceProperties.Data.VersionCompatibilityStatement || '';
consent = consent.toLowerCase();

if (consent.startsWith('yes')) {
Expand Down
2 changes: 1 addition & 1 deletion source/layers/core-lib/lib/.version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.0.0
4.0.1
2 changes: 1 addition & 1 deletion source/main/analysis/post-process/states/ad-break/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ function _bestGuessCandidates(scenes) {
}

// special case: content does not have end credits
if (contentTimestamps[1] < 0) {
if (contentTimestamps[1] < 0 && scenes.length > 0) {
contentTimestamps[1] = scenes[scenes.length - 1].timeEnd;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ const BaseState = require('../shared/base');
const MODEL_REGION = process.env.ENV_BEDROCK_REGION;
const MODEL_ID = process.env.ENV_BEDROCK_MODEL_ID;
const MODEL_VERSION = process.env.ENV_BEDROCK_MODEL_VER;
const SYSTEM = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. You are asked to provide the following information: a detail description to describe the scene, identify the most relevant IAB taxonomy, GARM, sentiment, and brands and logos that may appear in the scene. It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.';
const SYSTEM_IAB = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. You are asked to identify the most relevant IAB taxonomy. It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.';
const TASK_ALL = 'You are asked to provide the following information: a detail description to describe the scene, identify the most relevant IAB taxonomy, GARM, sentiment, and brands and logos that may appear in the scene, and five most relevant tags from the scene.';
const TASK_IAB = 'You are asked to identify the most relevant IAB taxonomy.';
const SYSTEM = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. {{TASK}} It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.';
const ASSISTANT = {
ProvideDialogues: {
role: 'assistant',
Expand All @@ -62,7 +63,7 @@ const MODEL_PARAMS = {
// top_p: 0.8,
// top_k: 250,
stop_sequences: ['\n\nHuman:'],
system: SYSTEM,
// system: SYSTEM,
};

const ENABLE_IMAGE_TILE = false;
Expand Down Expand Up @@ -701,6 +702,12 @@ async function _inference(
text: `Here is a list of Sentiments in <sentiment> tag:\n<sentiment>\n${sentiments.join('\n')}\n</sentiment>\nOnly answer the Sentiment from this list.`,
});

// tags
additional.push({
type: 'text',
text: 'Also provide five most relevant tags of the scene.',
});

messages.push({
role: 'user',
content: additional,
Expand Down Expand Up @@ -736,6 +743,12 @@ async function _inference(
score: 90,
},
],
tags: [
{
text: 'BMW',
score: 90,
},
],
};

const output = `Return JSON format. An example of the output:\n${JSON.stringify(example)}\n`;
Expand All @@ -747,10 +760,12 @@ async function _inference(
// assistant
messages.push(ASSISTANT.Prefill);

const system = SYSTEM.replace('{{TASK}}', TASK_ALL);
const modelParams = {
...MODEL_PARAMS,
...options,
messages,
system,
};

const response = await _invokeEndpoint(modelId, modelParams);
Expand Down Expand Up @@ -1083,7 +1098,7 @@ async function _inferenceRefineIAB(
// guardrail to only return JSON
messages.push(ASSISTANT.Prefill);

const system = SYSTEM_IAB;
const system = SYSTEM.replace('{{TASK}}', TASK_IAB);
const modelId = MODEL_ID;
const modelParams = {
...MODEL_PARAMS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ class StateSelectSegmentFrames {
segments
);

if (frameSegmentation.length === 0) {
throw new AnalysisError('no frame being selected');
}

console.log(`[INFO]: StateSelectSegmentFrames.process: ${frameSegmentation.length} out of ${framesExtracted}`);

const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const Jimp = require('jimp');
const TYPE_STEADY = ['ColorBars', 'BlackFrames', 'StudioLogo', 'Slate'];
const TYPE_CREDITS = ['EndCredits'];
const TYPE_OPENING = ['OpeningCredits'];
const TYPE_CONTENT = ['Content'];
const TYPE_CONTENT = ['Content', 'undefined'];
const HAMMING_DISTANCE_THRESHOLD = 0.85;
const SPLIT_INTERVAL = 2 * 60 * 1000; // 2min
const SAMPLING_INTERVAL = 3 * 1000; // 3s
Expand Down Expand Up @@ -48,6 +48,18 @@ function _withShotSegment(frameHashes, segments) {
}
});

// special case: potentially short form video. Fake the technicalCue.
if (technicalCues.length === 0) {
shotSegments.forEach((shotSegment) => {
technicalCues.push({
ShotSegmentRange: [shotSegment.ShotSegment.Index, shotSegment.ShotSegment.Index],
TechnicalCueSegment: {
Type: 'undefined',
},
});
});
}

let selected = [];
let shotIdx = 0;

Expand Down Expand Up @@ -178,7 +190,6 @@ function _selectFromShotSegment(
Math.round((send - ssta) / SAMPLING_INTERVAL),
1
);

selected = _selectByScanning(shotSegmentFrames, maxFrames);
} else {
console.log(`[INFO]: [#${shotIdx}]: ${technicalCueType}: not supported`);
Expand Down
7 changes: 7 additions & 0 deletions source/main/ingest/main/states/create-record/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

const PATH = require('path');
const {
aimlGetPresets,
DB,
CommonUtils,
MimeTypeHelper,
Expand All @@ -13,6 +14,7 @@ const {
IngestError,
} = require('core-lib');

const DEFAULT_AI_OPTIONS = process.env.ENV_DEFAULT_AI_OPTIONS;
const AI_OPTIONS_S3KEY = process.env.ENV_AI_OPTIONS_S3KEY;

class StateCreateRecord {
Expand Down Expand Up @@ -138,6 +140,11 @@ class StateCreateRecord {
undefined);
}

// load from environment variable
if (!options) {
options = aimlGetPresets(DEFAULT_AI_OPTIONS);
}

/* auto select frameCaptureMode if not defined */
if (options
&& options[AnalysisTypes.Rekognition.CustomLabel]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,16 @@ export default class AdBreakTab extends BaseAnalysisTab {
.addClass('lead-s b-300');
section.append(ulBrandAndLogos);

// Tags
const tags = $('<p/>')
.addClass('b-300 mr-4')
.append('Top 5 relevant tags');
section.append(tags);

const ulTags = $('<ul/>')
.addClass('lead-s b-300');
section.append(ulTags);

// Label category
const labelCategory = $('<p/>')
.addClass('b-300 mr-4')
Expand Down Expand Up @@ -510,6 +520,17 @@ export default class AdBreakTab extends BaseAnalysisTab {
}
});
}

// tags
if ((x.tags || []).length > 0) {
x.tags.forEach((item) => {
if (item.text) {
const li = $('<li/>')
.append(`${item.text} (${item.score}%)`);
ulTags.append(li);
}
});
}
});

contextual.forEach((x) => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,29 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

import SolutionManifest from '/solution-manifest.js';
import Localization from '../../../../../../../shared/localization.js';
import BaseAnalysisTab from '../../base/baseAnalysisTab.js';

const {
FoundationModels = [],
} = SolutionManifest;

const {
name: MODEL_NAME = '',
value: MODEL_ID = '',
} = FoundationModels[0] || {};

const MODEL_PRICING = (MODEL_ID.indexOf('sonnet') > 0)
? {
InputTokens: 0.00300,
OutputTokens: 0.01500,
}
: {
InputTokens: 0.00025,
OutputTokens: 0.00125,
};

const {
Messages: {
ImageCaptionTab: TITLE,
Expand Down Expand Up @@ -46,6 +66,10 @@ export default class ImageCaptionTab extends BaseAnalysisTab {
output = JSON.parse(output);

const {
usage: {
inputTokens,
outputTokens,
},
description,
altText,
fileName,
Expand Down Expand Up @@ -92,6 +116,16 @@ export default class ImageCaptionTab extends BaseAnalysisTab {
}
}
});

// usage
const estimatedCost = ((
(inputTokens * MODEL_PRICING.InputTokens) +
(outputTokens * MODEL_PRICING.OutputTokens)
) / 1000).toFixed(4);

const p = $('<p/>')
.append(`(Total of <code>${inputTokens}</code> input tokens and <code>${outputTokens}</code> output tokens using ${MODEL_NAME}. Estimated code is <code>$${estimatedCost}</code>.)`);
container.append(p);
}

return container;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,19 @@ export default class SegmentTab extends BaseRekognitionTab {
.addClass('lead-s b-300');
sectionBrandAndLogos.append(ulBrandAndLogos);

// tags
const sectionTags = $('<section/>');
sceneDescView.append(sectionTags);

desc = $('<p/>')
.addClass('b-400 mr-4')
.append('Top 5 relevant tags');
sectionTags.append(desc);

const ulTags = $('<ul/>')
.addClass('lead-s b-300');
sectionTags.append(ulTags);

item.details.forEach((x) => {
let li;
if ((x.description || {}).text) {
Expand Down Expand Up @@ -393,6 +406,17 @@ export default class SegmentTab extends BaseRekognitionTab {
}
});
}

// tags
if ((x.tags || []).length > 0) {
x.tags.forEach((_item) => {
if (_item.text) {
li = $('<li/>')
.append(`${_item.text} (${_item.score}%)`);
ulTags.append(li);
}
});
}
});
}

Expand Down

0 comments on commit 8f2d05f

Please sign in to comment.