Skip to content

Commit

Permalink
Fixes an issue that occurred when decoding audio samples using the Vi…
Browse files Browse the repository at this point in the history
…deoDecoder.

Signed-off-by: Ralph Gasser <[email protected]>
  • Loading branch information
ppanopticon committed Apr 19, 2024
1 parent 63e8b99 commit 76e010e
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,20 @@ import java.nio.ShortBuffer
interface AudioContent: ContentElement<ShortBuffer> {
/** Number of samples encoded in this [AudioContent]. */
val samples: Int
get() = this.content.limit()
get() = this.content.limit() / this.channels

/** The number of samples encoded in this [AudioContent]. */
val channel: Int
/** The size of this [AudioContent] in bytes. */
val size: Int
get() = this.content.limit() * Short.SIZE_BYTES

/** The number of channels encoded in this [AudioContent]. */
val channels: Short

/** The sampling rate of the data encoded in this [AudioContent]. */
val samplingRate: Int

/** The [ContentType] of an [AudioContent] is always [ContentType.AUDIO_FRAME]. */
override val type: ContentType
get() = ContentType.AUDIO_FRAME
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ class CachedContentFactory : ContentFactory, AutoCloseable {
return content
}

override fun newAudioContent(channel: Int, samplingRate: Int, audio: ShortBuffer): AudioContent {
override fun newAudioContent(channels: Short, sampleRate: Int, audio: ShortBuffer): AudioContent {
check(!this.closed) { "CachedContentFactory has been closed." }
val content = CachedAudioContent(this.nextPath(), channel, samplingRate, audio)
val content = CachedAudioContent(this.nextPath(), channels, sampleRate, audio)
this.refSet.add(CachedItem(content, this.referenceQueue))
return content
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import java.nio.ShortBuffer
interface ContentFactory {
fun newImageContent(bufferedImage: BufferedImage): ImageContent

fun newAudioContent(channel: Int, samplingRate: Int, audio: ShortBuffer): AudioContent
fun newAudioContent(channels: Short, sampleRate: Int, audio: ShortBuffer): AudioContent

fun newTextContent(text: String): TextContent

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class InMemoryContentFactory : ContentFactory {

override fun newImageContent(bufferedImage: BufferedImage): ImageContent = InMemoryImageContent(bufferedImage)

override fun newAudioContent(channel: Int, samplingRate: Int, audio: ShortBuffer): AudioContent = InMemoryAudioContent(channel, samplingRate, audio)
override fun newAudioContent(channels: Short, sampleRate: Int, audio: ShortBuffer): AudioContent = InMemoryAudioContent(channels, sampleRate, audio)

override fun newTextContent(text: String): TextContent = InMemoryTextContent(text)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ import java.nio.file.StandardOpenOption
* @author Ralph Gasser
* @version 1.0.0
*/
class CachedAudioContent(override val path: Path, override val channel: Int, override val samplingRate: Int, buffer: ShortBuffer) : AudioContent, CachedContent {
class CachedAudioContent(override val path: Path, override val channels: Short, override val samplingRate: Int, buffer: ShortBuffer) : AudioContent, CachedContent {
/** The [SoftReference] of the [ByteBuffer] used for caching. */
private var reference: SoftReference<ShortBuffer> = SoftReference(buffer)

/** The number of samples contained in this [CachedAudioContent]. */
override val samples: Int = buffer.limit() / this.channel
override val samples: Int = buffer.limit() / this.channels

/** The size of this [CachedAudioContent] in bytes. */
override val size: Int = buffer.limit() * Short.SIZE_BYTES

/** The audio samples contained in this [CachedAudioContent]. */
override val content: ShortBuffer
Expand All @@ -34,9 +37,9 @@ class CachedAudioContent(override val path: Path, override val channel: Int, ove
}

init {
val outBuffer = ByteBuffer.allocate(this.samples * 2).order(ByteOrder.LITTLE_ENDIAN)
for (i in 0 until this.samples) {
outBuffer.putShort(buffer.get())
val outBuffer = ByteBuffer.allocate(this.size).order(ByteOrder.LITTLE_ENDIAN)
for (i in 0 until buffer.limit()) {
outBuffer.putShort(buffer.get(i))
}
Files.newByteChannel(this.path, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE).use {
it.write(outBuffer.flip())
Expand All @@ -49,10 +52,8 @@ class CachedAudioContent(override val path: Path, override val channel: Int, ove
* @return [ByteBuffer]
*/
private fun reload(): ShortBuffer {
val buffer = ByteBuffer.allocate(this.samples * 2).order(ByteOrder.LITTLE_ENDIAN)
Files.newByteChannel(this.path, StandardOpenOption.READ).use {
it.read(buffer)
}
val buffer = ByteBuffer.allocate(this.size).order(ByteOrder.LITTLE_ENDIAN)
Files.newByteChannel(this.path, StandardOpenOption.READ).use { it.read(buffer) }
return buffer.flip().asShortBuffer()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ import java.nio.ShortBuffer
* @author Ralph Gasser
* @version 1.0.0
*/
data class InMemoryAudioContent(override val channel: Int, override val samplingRate: Int, override val content: ShortBuffer) : AudioContent
data class InMemoryAudioContent(override val channels: Short, override val samplingRate: Int, override val content: ShortBuffer) : AudioContent
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.flow.filter
import org.bytedeco.javacv.FFmpegFrameGrabber
import org.bytedeco.javacv.Frame
import org.bytedeco.javacv.FrameGrabber
import org.bytedeco.javacv.Java2DFrameConverter
import org.vitrivr.engine.core.context.IndexContext
import org.vitrivr.engine.core.model.content.Content
Expand Down Expand Up @@ -79,6 +80,11 @@ class VideoDecoder : DecoderFactory {
var audioCounter = 0
source.newInputStream().use { input ->
FFmpegFrameGrabber(input).use { grabber ->

/* Configure FFmpegFrameGrabber. */
grabber.imageMode = FrameGrabber.ImageMode.COLOR
grabber.sampleMode = FrameGrabber.SampleMode.SHORT

logger.info { "Start decoding source ${source.name} (${source.sourceId})" }
try {
grabber.start()
Expand All @@ -93,6 +99,7 @@ class VideoDecoder : DecoderFactory {

/* Start extraction of frames. */
var frame = grabber.grabFrame(this@Instance.audio, this@Instance.video, true, this@Instance.keyFrames, true)

while (frame != null) {
when (frame.type) {
Frame.Type.VIDEO -> if ((videoCounter++) % this@Instance.sampleVideo == 0) {
Expand Down Expand Up @@ -144,18 +151,21 @@ class VideoDecoder : DecoderFactory {
* @param source The [ProducerScope]'s to send [ContentElement] to.
*/
private suspend fun emitAudioContent(frame: Frame, source: Source, channel: ProducerScope<ContentElement<*>>) {
for ((c, s) in frame.samples.withIndex()) {
val normalizedSamples = when (s) {
is ShortBuffer -> s
else -> ShortBuffer.allocate(0)/* TODO: Cover other cases. */
}
val timestampNs: Long = frame.timestamp * 1000 // Convert microseconds to nanoseconds
val audio = this.context.contentFactory.newAudioContent(c, frame.sampleRate, normalizedSamples)
channel.send(object : AudioContent by audio, SourcedContent.Temporal {
override val source: Source = source
override val timepointNs: Long = timestampNs
})
val timestampNs: Long = frame.timestamp * 1000 // Convert microseconds to nanoseconds

/* Copy audio samples (important)! */
val samples = (frame.samples.firstOrNull() as? ShortBuffer)?.let { ShortBuffer.allocate(it.limit()).put(it).flip() }
if (samples == null) {
logger.warn { "Audio frame at timestamp $timestampNs did not contain any audio samples." }
return
}

/* Create and emit audio content. */
val audio = this.context.contentFactory.newAudioContent(frame.audioChannels.toShort(), frame.sampleRate, samples)
channel.send(object : AudioContent by audio, SourcedContent.Temporal {
override val source: Source = source
override val timepointNs: Long = timestampNs
})
}
}
}

0 comments on commit 76e010e

Please sign in to comment.