diff --git a/CHANGELOG.md b/CHANGELOG.md index 65ba0fa..8650f81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.0.1] - 2024-04-11 +### Bugfixes +- AWS Elemental MediaConvert does not create frameCapture group when s3://[PROXY_BUCKET]/_settings/aioption.json is missing. +- Short form video (5s) fails the analysis. +- Rephrase the Version Compatibility input parameter on the CFN template to be more clear. + +### New features +- added Top 5 most relevant tags at the scene level + + ## [4.0.0] - 2024-03-06 ### New features - Dynamic frame analysis workflow diff --git a/deployment/media2cloud-backend-stack.yaml b/deployment/media2cloud-backend-stack.yaml index 1534e4a..43e00b5 100644 --- a/deployment/media2cloud-backend-stack.yaml +++ b/deployment/media2cloud-backend-stack.yaml @@ -3519,6 +3519,8 @@ Resources: - bOpenSearchServerless - 1 - 0 + ENV_DEFAULT_AI_OPTIONS: !Ref DefaultAIOptions + ENV_DEFAULT_MINCONFIDENCE: !Ref DefaultMinConfidence ENV_AI_OPTIONS_S3KEY: !Ref AIOptionsS3Key IngestMainStateMachine: diff --git a/deployment/media2cloud.yaml b/deployment/media2cloud.yaml index fb2f622..04a4018 100644 --- a/deployment/media2cloud.yaml +++ b/deployment/media2cloud.yaml @@ -161,7 +161,6 @@ Parameters: VersionCompatibilityStatement: Type: String Description: The new Version 4 of Media2Cloud is not compatible with previous versions due to several optimization changes. These changes include the Amazon OpenSearch cluster indexes and consolidation of the generated metadata. While a migration tool is being developed to help customers migrate from previous versions to Version 4, this CloudFormation template SHOULD NOT be used to update your existing Media2Cloud V3 deployment to the latest version. Confirm that you have read and understand the version compatibility statement. If you are creating a new stack, select "Yes, I understand and proceed". - Default: No, do not proceed AllowedValues: - Yes, I understand and proceed - No, do not proceed @@ -171,7 +170,7 @@ Metadata: ParameterGroups: - Label: - default: Version Compatibility + default: PLEASE READ AND SELECT AN ANSWER Parameters: - VersionCompatibilityStatement - @@ -214,7 +213,7 @@ Metadata: - BedrockModel ParameterLabels: VersionCompatibilityStatement: - default: Please read the following statement + default: Version compatibility Email: default: Email PriceClass: diff --git a/source/api/lib/operations/genai/claude.js b/source/api/lib/operations/genai/claude.js index c04d9a7..7a08868 100644 --- a/source/api/lib/operations/genai/claude.js +++ b/source/api/lib/operations/genai/claude.js @@ -383,7 +383,7 @@ function _createCustomPrompt(options) { const transcript = _textInput(options); messages.push({ role: 'user', - content: `Transcript in tag:\n${transcript}\n`, + content: `Transcript in tag:\n${transcript}\n\n${options.prompt}`, }); messages.push({ diff --git a/source/custom-resources/lib/versionCompatibility/index.js b/source/custom-resources/lib/versionCompatibility/index.js index 0d1059a..ff0bf04 100644 --- a/source/custom-resources/lib/versionCompatibility/index.js +++ b/source/custom-resources/lib/versionCompatibility/index.js @@ -18,7 +18,7 @@ exports.CheckVersionCompatibilityStatement = async (event, context) => { return x0.responseData; } - let consent = event.ResourceProperties.Data.VersionCompatibilityStatement; + let consent = event.ResourceProperties.Data.VersionCompatibilityStatement || ''; consent = consent.toLowerCase(); if (consent.startsWith('yes')) { diff --git a/source/layers/core-lib/lib/.version b/source/layers/core-lib/lib/.version index fcdb2e1..1454f6e 100644 --- a/source/layers/core-lib/lib/.version +++ b/source/layers/core-lib/lib/.version @@ -1 +1 @@ -4.0.0 +4.0.1 diff --git a/source/main/analysis/post-process/states/ad-break/index.js b/source/main/analysis/post-process/states/ad-break/index.js index 81977d9..9abe457 100644 --- a/source/main/analysis/post-process/states/ad-break/index.js +++ b/source/main/analysis/post-process/states/ad-break/index.js @@ -746,7 +746,7 @@ function _bestGuessCandidates(scenes) { } // special case: content does not have end credits - if (contentTimestamps[1] < 0) { + if (contentTimestamps[1] < 0 && scenes.length > 0) { contentTimestamps[1] = scenes[scenes.length - 1].timeEnd; } diff --git a/source/main/analysis/post-process/states/create-scene-taxonomy/index.js b/source/main/analysis/post-process/states/create-scene-taxonomy/index.js index 8f7ebc3..52e3fde 100644 --- a/source/main/analysis/post-process/states/create-scene-taxonomy/index.js +++ b/source/main/analysis/post-process/states/create-scene-taxonomy/index.js @@ -35,8 +35,9 @@ const BaseState = require('../shared/base'); const MODEL_REGION = process.env.ENV_BEDROCK_REGION; const MODEL_ID = process.env.ENV_BEDROCK_MODEL_ID; const MODEL_VERSION = process.env.ENV_BEDROCK_MODEL_VER; -const SYSTEM = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. You are asked to provide the following information: a detail description to describe the scene, identify the most relevant IAB taxonomy, GARM, sentiment, and brands and logos that may appear in the scene. It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.'; -const SYSTEM_IAB = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. You are asked to identify the most relevant IAB taxonomy. It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.'; +const TASK_ALL = 'You are asked to provide the following information: a detail description to describe the scene, identify the most relevant IAB taxonomy, GARM, sentiment, and brands and logos that may appear in the scene, and five most relevant tags from the scene.'; +const TASK_IAB = 'You are asked to identify the most relevant IAB taxonomy.'; +const SYSTEM = 'You are a media operation engineer. Your job is to review a portion of a video content presented by a sequence of consecutive images. Each image also contains a sequence of frames presented in a 4x7 grid reading from left to right and then from top to bottom. You may also optionally be given the dialogues of the scene that helps you to understand the context. {{TASK}} It is important to return the results in JSON format and also includes a confidence score from 0 to 100. Skip any explanation.'; const ASSISTANT = { ProvideDialogues: { role: 'assistant', @@ -62,7 +63,7 @@ const MODEL_PARAMS = { // top_p: 0.8, // top_k: 250, stop_sequences: ['\n\nHuman:'], - system: SYSTEM, + // system: SYSTEM, }; const ENABLE_IMAGE_TILE = false; @@ -701,6 +702,12 @@ async function _inference( text: `Here is a list of Sentiments in tag:\n\n${sentiments.join('\n')}\n\nOnly answer the Sentiment from this list.`, }); + // tags + additional.push({ + type: 'text', + text: 'Also provide five most relevant tags of the scene.', + }); + messages.push({ role: 'user', content: additional, @@ -736,6 +743,12 @@ async function _inference( score: 90, }, ], + tags: [ + { + text: 'BMW', + score: 90, + }, + ], }; const output = `Return JSON format. An example of the output:\n${JSON.stringify(example)}\n`; @@ -747,10 +760,12 @@ async function _inference( // assistant messages.push(ASSISTANT.Prefill); + const system = SYSTEM.replace('{{TASK}}', TASK_ALL); const modelParams = { ...MODEL_PARAMS, ...options, messages, + system, }; const response = await _invokeEndpoint(modelId, modelParams); @@ -1083,7 +1098,7 @@ async function _inferenceRefineIAB( // guardrail to only return JSON messages.push(ASSISTANT.Prefill); - const system = SYSTEM_IAB; + const system = SYSTEM.replace('{{TASK}}', TASK_IAB); const modelId = MODEL_ID; const modelParams = { ...MODEL_PARAMS, diff --git a/source/main/analysis/video/states/select-segment-frames/index.js b/source/main/analysis/video/states/select-segment-frames/index.js index 1868670..b594e0a 100644 --- a/source/main/analysis/video/states/select-segment-frames/index.js +++ b/source/main/analysis/video/states/select-segment-frames/index.js @@ -116,6 +116,10 @@ class StateSelectSegmentFrames { segments ); + if (frameSegmentation.length === 0) { + throw new AnalysisError('no frame being selected'); + } + console.log(`[INFO]: StateSelectSegmentFrames.process: ${frameSegmentation.length} out of ${framesExtracted}`); const { diff --git a/source/main/analysis/video/states/select-segment-frames/selectionHelper.js b/source/main/analysis/video/states/select-segment-frames/selectionHelper.js index 62f7991..b08318b 100644 --- a/source/main/analysis/video/states/select-segment-frames/selectionHelper.js +++ b/source/main/analysis/video/states/select-segment-frames/selectionHelper.js @@ -7,7 +7,7 @@ const Jimp = require('jimp'); const TYPE_STEADY = ['ColorBars', 'BlackFrames', 'StudioLogo', 'Slate']; const TYPE_CREDITS = ['EndCredits']; const TYPE_OPENING = ['OpeningCredits']; -const TYPE_CONTENT = ['Content']; +const TYPE_CONTENT = ['Content', 'undefined']; const HAMMING_DISTANCE_THRESHOLD = 0.85; const SPLIT_INTERVAL = 2 * 60 * 1000; // 2min const SAMPLING_INTERVAL = 3 * 1000; // 3s @@ -48,6 +48,18 @@ function _withShotSegment(frameHashes, segments) { } }); + // special case: potentially short form video. Fake the technicalCue. + if (technicalCues.length === 0) { + shotSegments.forEach((shotSegment) => { + technicalCues.push({ + ShotSegmentRange: [shotSegment.ShotSegment.Index, shotSegment.ShotSegment.Index], + TechnicalCueSegment: { + Type: 'undefined', + }, + }); + }); + } + let selected = []; let shotIdx = 0; @@ -178,7 +190,6 @@ function _selectFromShotSegment( Math.round((send - ssta) / SAMPLING_INTERVAL), 1 ); - selected = _selectByScanning(shotSegmentFrames, maxFrames); } else { console.log(`[INFO]: [#${shotIdx}]: ${technicalCueType}: not supported`); diff --git a/source/main/ingest/main/states/create-record/index.js b/source/main/ingest/main/states/create-record/index.js index 7d03a44..baf6a47 100644 --- a/source/main/ingest/main/states/create-record/index.js +++ b/source/main/ingest/main/states/create-record/index.js @@ -3,6 +3,7 @@ const PATH = require('path'); const { + aimlGetPresets, DB, CommonUtils, MimeTypeHelper, @@ -13,6 +14,7 @@ const { IngestError, } = require('core-lib'); +const DEFAULT_AI_OPTIONS = process.env.ENV_DEFAULT_AI_OPTIONS; const AI_OPTIONS_S3KEY = process.env.ENV_AI_OPTIONS_S3KEY; class StateCreateRecord { @@ -138,6 +140,11 @@ class StateCreateRecord { undefined); } + // load from environment variable + if (!options) { + options = aimlGetPresets(DEFAULT_AI_OPTIONS); + } + /* auto select frameCaptureMode if not defined */ if (options && options[AnalysisTypes.Rekognition.CustomLabel] diff --git a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/adbreak/adbreakTab.js b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/adbreak/adbreakTab.js index b42a33a..e419b04 100644 --- a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/adbreak/adbreakTab.js +++ b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/adbreak/adbreakTab.js @@ -463,6 +463,16 @@ export default class AdBreakTab extends BaseAnalysisTab { .addClass('lead-s b-300'); section.append(ulBrandAndLogos); + // Tags + const tags = $('

') + .addClass('b-300 mr-4') + .append('Top 5 relevant tags'); + section.append(tags); + + const ulTags = $('

    ') + .addClass('lead-s b-300'); + section.append(ulTags); + // Label category const labelCategory = $('

    ') .addClass('b-300 mr-4') @@ -510,6 +520,17 @@ export default class AdBreakTab extends BaseAnalysisTab { } }); } + + // tags + if ((x.tags || []).length > 0) { + x.tags.forEach((item) => { + if (item.text) { + const li = $('

  • ') + .append(`${item.text} (${item.score}%)`); + ulTags.append(li); + } + }); + } }); contextual.forEach((x) => { diff --git a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/image/imageCaption.js b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/image/imageCaption.js index 1d45e8e..afe0b27 100644 --- a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/image/imageCaption.js +++ b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/image/imageCaption.js @@ -1,9 +1,29 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +import SolutionManifest from '/solution-manifest.js'; import Localization from '../../../../../../../shared/localization.js'; import BaseAnalysisTab from '../../base/baseAnalysisTab.js'; +const { + FoundationModels = [], +} = SolutionManifest; + +const { + name: MODEL_NAME = '', + value: MODEL_ID = '', +} = FoundationModels[0] || {}; + +const MODEL_PRICING = (MODEL_ID.indexOf('sonnet') > 0) + ? { + InputTokens: 0.00300, + OutputTokens: 0.01500, + } + : { + InputTokens: 0.00025, + OutputTokens: 0.00125, + }; + const { Messages: { ImageCaptionTab: TITLE, @@ -46,6 +66,10 @@ export default class ImageCaptionTab extends BaseAnalysisTab { output = JSON.parse(output); const { + usage: { + inputTokens, + outputTokens, + }, description, altText, fileName, @@ -92,6 +116,16 @@ export default class ImageCaptionTab extends BaseAnalysisTab { } } }); + + // usage + const estimatedCost = (( + (inputTokens * MODEL_PRICING.InputTokens) + + (outputTokens * MODEL_PRICING.OutputTokens) + ) / 1000).toFixed(4); + + const p = $('

    ') + .append(`(Total of ${inputTokens} input tokens and ${outputTokens} output tokens using ${MODEL_NAME}. Estimated code is $${estimatedCost}.)`); + container.append(p); } return container; diff --git a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/video/segmentTab.js b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/video/segmentTab.js index 57c599b..1dd7368 100644 --- a/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/video/segmentTab.js +++ b/source/webapp/src/lib/js/app/mainView/collection/base/components/analysis/rekognition/video/segmentTab.js @@ -354,6 +354,19 @@ export default class SegmentTab extends BaseRekognitionTab { .addClass('lead-s b-300'); sectionBrandAndLogos.append(ulBrandAndLogos); + // tags + const sectionTags = $('

    '); + sceneDescView.append(sectionTags); + + desc = $('

    ') + .addClass('b-400 mr-4') + .append('Top 5 relevant tags'); + sectionTags.append(desc); + + const ulTags = $('

      ') + .addClass('lead-s b-300'); + sectionTags.append(ulTags); + item.details.forEach((x) => { let li; if ((x.description || {}).text) { @@ -393,6 +406,17 @@ export default class SegmentTab extends BaseRekognitionTab { } }); } + + // tags + if ((x.tags || []).length > 0) { + x.tags.forEach((_item) => { + if (_item.text) { + li = $('
    • ') + .append(`${_item.text} (${_item.score}%)`); + ulTags.append(li); + } + }); + } }); }