Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Parse MPEG-TS PMT ES language and maximum bitrate descriptors (#369) #1311

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packager/media/base/audio_stream_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ class AudioStreamInfo : public StreamInfo {
sampling_frequency_ = sampling_frequency;
}

void set_max_bitrate(const uint32_t max_bitrate) {
max_bitrate_ = max_bitrate;
}

/// @param audio_object_type is only used by AAC Codec, ignored otherwise.
/// @return The codec string.
static std::string GetCodecString(Codec codec, uint8_t audio_object_type);
Expand Down
1 change: 1 addition & 0 deletions packager/media/formats/mp2t/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_library(mp2t STATIC
pes_packet_generator.h
program_map_table_writer.cc
program_map_table_writer.h
ts_audio_type.h
ts_muxer.cc
ts_muxer.h
ts_packet.cc
Expand Down
27 changes: 25 additions & 2 deletions packager/media/formats/mp2t/mp2t_media_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <packager/media/formats/mp2t/es_parser_h264.h>
#include <packager/media/formats/mp2t/es_parser_h265.h>
#include <packager/media/formats/mp2t/mp2t_common.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_packet.h>
#include <packager/media/formats/mp2t/ts_section.h>
#include <packager/media/formats/mp2t/ts_section_pat.h>
Expand Down Expand Up @@ -274,7 +275,8 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {
DVLOG(1) << "Create a new PMT parser";
std::unique_ptr<TsSection> pmt_section_parser(new TsSectionPmt(std::bind(
&Mp2tMediaParser::RegisterPes, this, pmt_pid, std::placeholders::_1,
std::placeholders::_2, std::placeholders::_3, std::placeholders::_4)));
std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
std::placeholders::_5, std::placeholders::_6, std::placeholders::_7)));
std::unique_ptr<PidState> pmt_pid_state(
new PidState(pmt_pid, PidState::kPidPmt, std::move(pmt_section_parser)));
pmt_pid_state->Enable();
Expand All @@ -284,13 +286,19 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {
void Mp2tMediaParser::RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType stream_type,
uint32_t max_bitrate,
const std::string& lang,
TsAudioType audio_type,
const uint8_t* descriptor,
size_t descriptor_length) {
if (pids_.count(pes_pid) != 0)
return;
DVLOG(1) << "RegisterPes:"
<< " pes_pid=" << pes_pid << " stream_type=" << std::hex
<< static_cast<int>(stream_type) << std::dec;
<< static_cast<int>(stream_type) << std::dec
<< "max_bitrate=" << max_bitrate << " lang=" << lang
<< "audio_type=" << std::hex << static_cast<int>(audio_type)
<< std::dec;

// Create a stream parser corresponding to the stream type.
PidState::PidType pid_type = PidState::kPidVideoPes;
Expand Down Expand Up @@ -340,6 +348,10 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
new PidState(pes_pid, pid_type, std::move(pes_section_parser)));
pes_pid_state->Enable();
pids_.emplace(pes_pid, std::move(pes_pid_state));

// Store PES metadata.
pes_metadata_.insert(
std::make_pair(pes_pid, PesMetadata{max_bitrate, lang, audio_type}));
}

void Mp2tMediaParser::OnNewStreamInfo(
Expand All @@ -358,6 +370,17 @@ void Mp2tMediaParser::OnNewStreamInfo(

if (new_stream_info) {
// Set the stream configuration information for the PID.
auto pes_metadata = pes_metadata_.find(pes_pid);
DCHECK(pes_metadata != pes_metadata_.end());
if (!pes_metadata->second.language.empty())
new_stream_info->set_language(pes_metadata->second.language);
if (new_stream_info->stream_type() == kStreamAudio) {
auto* audio_info = static_cast<AudioStreamInfo*>(new_stream_info.get());
audio_info->set_max_bitrate(pes_metadata->second.max_bitrate);
// TODO(modernletter) Add some field for audio type to AudioStreamInfo
// and set here from audio_type
}

pid_state->second->set_config(new_stream_info);
} else {
LOG(WARNING) << "Ignoring unsupported stream with pid=" << pes_pid;
Expand Down
16 changes: 16 additions & 0 deletions packager/media/formats/mp2t/mp2t_media_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
#include <deque>
#include <map>
#include <memory>
#include <string>

#include <packager/macros/classes.h>
#include <packager/media/base/byte_queue.h>
#include <packager/media/base/media_parser.h>
#include <packager/media/base/stream_info.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>

namespace shaka {
Expand All @@ -27,6 +29,12 @@ class PidState;
class TsPacket;
class TsSection;

struct PesMetadata {
uint32_t max_bitrate;
std::string language;
TsAudioType audio_type;
};

class Mp2tMediaParser : public MediaParser {
public:
Mp2tMediaParser();
Expand All @@ -50,10 +58,15 @@ class Mp2tMediaParser : public MediaParser {
// Callback invoked to register a PES pid.
// Possible values for |media_type| are defined in:
// ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments".
// Possible values for |audio_type| are defined in:
// ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values".
// |pes_pid| is part of the Program Map Table refered by |pmt_pid|.
void RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType media_type,
uint32_t max_bitrate,
const std::string& lang,
TsAudioType audio_type,
const uint8_t* descriptor,
size_t descriptor_length);

Expand Down Expand Up @@ -94,6 +107,9 @@ class Mp2tMediaParser : public MediaParser {
// has a deterministic order.
std::map<int, std::unique_ptr<PidState>> pids_;

// Map of PIDs and their metadata.
std::map<int, PesMetadata> pes_metadata_;

// Whether |init_cb_| has been invoked.
bool is_initialized_;

Expand Down
14 changes: 14 additions & 0 deletions packager/media/formats/mp2t/mp2t_media_parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <gtest/gtest.h>

#include <packager/macros/logging.h>
#include <packager/media/base/audio_stream_info.h>
#include <packager/media/base/media_sample.h>
#include <packager/media/base/stream_info.h>
#include <packager/media/base/timestamp.h>
Expand Down Expand Up @@ -190,6 +191,19 @@ TEST_F(Mp2tMediaParserTest, PtsZeroDtsWrapAround) {
EXPECT_GT(video_max_pts_, static_cast<int64_t>(1) << 33);
}

TEST_F(Mp2tMediaParserTest, PmtEsDescriptors) {
//"bear-eng-visualy-impaired-audio.ts" consist of audio stream marked as
// english audio with commentary for visualy impaired viewer and max
// bitrate set to ~128kbps

ParseMpeg2TsFile("bear-visualy-impaired-eng-audio.ts", 188);
EXPECT_TRUE(parser_->Flush());
EXPECT_STREQ("eng", stream_map_[257]->language().c_str());

auto* audio_info = static_cast<AudioStreamInfo*>(stream_map_[257].get());
EXPECT_EQ(131600, audio_info->max_bitrate());
}

} // namespace mp2t
} // namespace media
} // namespace shaka
30 changes: 30 additions & 0 deletions packager/media/formats/mp2t/ts_audio_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

#ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H
#define PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H

#include <stdint.h>

namespace shaka {
namespace media {
namespace mp2t {

enum class TsAudioType : uint8_t {
// ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values"
kUndefined = 0x00,
kCleanEffects = 0x01,
kHearingImpaired = 0x02,
kVisualyImpairedCommentary = 0x03,
// 0x04-0x7F - user private
// 0x80-0xFF - reserved
};

} // namespace mp2t
} // namespace media
} // namespace shaka

#endif // PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H
65 changes: 57 additions & 8 deletions packager/media/formats/mp2t/ts_section_pmt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,21 @@

#include <packager/media/base/bit_reader.h>
#include <packager/media/formats/mp2t/mp2t_common.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>

namespace shaka {
namespace media {
namespace mp2t {

namespace {

const int kISO639LanguageDescriptor = 0x0A;
const int kMaximumBitrateDescriptor = 0x0E;
const int kSubtitlingDescriptor = 0x59;

} // namespace

TsSectionPmt::TsSectionPmt(const RegisterPesCb& register_pes_cb)
: register_pes_cb_(register_pes_cb) {
}
Expand Down Expand Up @@ -82,6 +91,9 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
TsStreamType stream_type;
const uint8_t* descriptor;
size_t descriptor_length;
std::string lang;
uint32_t max_bitrate;
TsAudioType audio_type;
};
std::vector<Info> pid_info;
while (static_cast<int>(bit_reader->bits_available()) >
Expand All @@ -99,22 +111,59 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
// Do not register the PID right away.
// Wait for the end of the section to be fully parsed
// to make sure there is no error.
pid_info.push_back({pid_es, stream_type, descriptor, es_info_length});
pid_info.push_back({pid_es, stream_type, descriptor, es_info_length, "", 0,
TsAudioType::kUndefined});

// Read the ES info descriptors.
// Defined in section 2.6 of ISO-13818.
if (es_info_length > 0) {
uint8_t descriptor_tag;
uint8_t descriptor_tag;
uint8_t descriptor_length;

while (es_info_length) {
RCHECK(bit_reader->ReadBits(8, &descriptor_tag));
es_info_length--;
RCHECK(bit_reader->ReadBits(8, &descriptor_length));
es_info_length -= 2;

// See ETSI EN 300 468 Section 6.1
if (stream_type == TsStreamType::kPesPrivateData &&
descriptor_tag == 0x59) { // subtitling_descriptor
descriptor_tag == kSubtitlingDescriptor) {
pid_info.back().stream_type = TsStreamType::kDvbSubtitles;
} else if (descriptor_tag == kISO639LanguageDescriptor &&
descriptor_length >= 4) {
// See section 2.6.19 of ISO-13818
// Descriptor can contain 0..N language defintions,
// we process only the first one
RCHECK(es_info_length >= 4);

char lang[3];
RCHECK(bit_reader->ReadBits(8, &lang[0])); // ISO_639_language_code
RCHECK(bit_reader->ReadBits(8, &lang[1]));
RCHECK(bit_reader->ReadBits(8, &lang[2]));
RCHECK(bit_reader->ReadBits(8, &pid_info.back().audio_type));
pid_info.back().lang = std::string(lang, 3);

es_info_length -= 4;
descriptor_length -= 4;
} else if (descriptor_tag == kMaximumBitrateDescriptor &&
descriptor_length >= 3) {
// See section 2.6.25 of ISO-13818
RCHECK(es_info_length >= 3);

uint32_t max_bitrate;
RCHECK(bit_reader->SkipBits(2)); // reserved
RCHECK(bit_reader->ReadBits(22, &max_bitrate));
// maximum bitrate is stored in units of 50 bytes per second
pid_info.back().max_bitrate = 50 * 8 * max_bitrate;

es_info_length -= 3;
descriptor_length -= 3;
}

RCHECK(bit_reader->SkipBits(8 * descriptor_length));
es_info_length -= descriptor_length;
}
RCHECK(bit_reader->SkipBits(8 * es_info_length));

RCHECK(bit_reader->SkipBytes(es_info_length));
}

// Read the CRC.
Expand All @@ -123,8 +172,8 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {

// Once the PMT has been proved to be correct, register the PIDs.
for (auto& info : pid_info) {
register_pes_cb_(info.pid_es, info.stream_type, info.descriptor,
info.descriptor_length);
register_pes_cb_(info.pid_es, info.stream_type, info.max_bitrate, info.lang,
info.audio_type, info.descriptor, info.descriptor_length);
}

return true;
Expand Down
16 changes: 14 additions & 2 deletions packager/media/formats/mp2t/ts_section_pmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
#define PACKAGER_MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_

#include <functional>
#include <string>

#include <packager/macros/classes.h>
#include <packager/media/formats/mp2t/ts_audio_type.h>
#include <packager/media/formats/mp2t/ts_section_psi.h>
#include <packager/media/formats/mp2t/ts_stream_type.h>

Expand All @@ -17,10 +19,20 @@ namespace mp2t {

class TsSectionPmt : public TsSectionPsi {
public:
// RegisterPesCb::Run(int pes_pid, int stream_type);
// RegisterPesCb::Run(int pes_pid, int stream_type, uint32_t max_bitrate,
// const string& lang, TsAudioType audio_type, uint8_t* descriptor,
// size_t desriptor_size);
// Stream type is defined in
// "Table 2-34 – Stream type assignments" in H.222
typedef std::function<void(int, TsStreamType, const uint8_t*, size_t)>
// Audio type is defined in
// "Table 2-60 - Audio type values" in H.222
typedef std::function<void(int,
TsStreamType,
uint32_t,
const std::string&,
TsAudioType,
const uint8_t*,
size_t)>
RegisterPesCb;

explicit TsSectionPmt(const RegisterPesCb& register_pes_cb);
Expand Down
6 changes: 6 additions & 0 deletions packager/media/test/data/README
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ bear-640x360.ts - AVC + AAC encode, multiplexed into an MPEG2-TS container.
bear-640x360_ptswraparound.ts - Same as bear-640x360.ts, with a timestamp wrap-around in the middle, created with the below command:
ffmpeg -itsoffset 95442 -i bear-640x360.ts -c:v copy -c:a copy -muxdelay 0 bear-640x360_ptswraparound.ts
bear-640x360-hevc.ts - HEVC + AAC encode, multiplexed into an MPEG2-TS container.
bear-eng-visualy-impaired-audio.ts - Audio stream from bear-640x360.ts marked as english with commentary for visually impaired viewer using the below commands:
tsp -I file bear-640x360.ts \
-P filter --video --negate
-P inject --replace --pid 4096 --xml bear-visualy-impaired-eng-audio-pmt.xml \
-O file bear-visualy-impaired-eng-audio.ts
(xml template can be obtained by command "tsp -I file bear-640x360.ts -P tables --pid 4096 --tid 2 --max 1 --xml pmt.xml -O drop")

// ISO-BMFF streams.
bear-1280x720.mp4 - AVC + AAC encode, mulitplexed into an ISOBMFF container.
Expand Down
12 changes: 12 additions & 0 deletions packager/media/test/data/bear-visualy-impaired-eng-audio-pmt.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<tsduck>
<PMT version="0" current="true" service_id="0x0001" PCR_PID="0x0100">
<metadata PID="4,096"/>
<component elementary_PID="0x0101" stream_type="0x0F">
<ISO_639_language_descriptor>
<language code="eng" audio_type="0x03"/>
</ISO_639_language_descriptor>
<maximum_bitrate_descriptor maximum_bitrate="131,600"/>
</component>
</PMT>
</tsduck>
Binary file not shown.
Loading