diff --git a/trunk/conf/full.conf b/trunk/conf/full.conf index 0579788fac..361c5d7229 100644 --- a/trunk/conf/full.conf +++ b/trunk/conf/full.conf @@ -1787,6 +1787,13 @@ vhost hls.srs.com { # default: off enabled on; + # whether to use fmp4 as container + # The default value is off, then HLS use ts as container format, + # if on, HLS use fmp4 as container format. + # Overwrite by env SRS_VHOST_HLS_HLS_USE_FMP4 for all vhosts. + # default: off + hls_use_fmp4 on; + # the hls fragment in seconds, the duration of a piece of ts. # Overwrite by env SRS_VHOST_HLS_HLS_FRAGMENT for all vhosts. # default: 10 @@ -1852,6 +1859,26 @@ vhost hls.srs.com { # Overwrite by env SRS_VHOST_HLS_HLS_TS_FILE for all vhosts. # default: [app]/[stream]-[seq].ts hls_ts_file [app]/[stream]-[seq].ts; + # the hls fmp4 file name. + # we supports some variables to generate the filename. + # [vhost], the vhost of stream. + # [app], the app of stream. + # [stream], the stream name of stream. + # [2006], replace this const to current year. + # [01], replace this const to current month. + # [02], replace this const to current date. + # [15], replace this const to current hour. + # [04], replace this const to current minute. + # [05], replace this const to current second.p + # [999], replace this const to current millisecond. + # [timestamp],replace this const to current UNIX timestamp in ms. + # [seq], the sequence number of fmp4. + # [duration], replace this const to current ts duration. + # @see https://ossrs.net/lts/zh-cn/docs/v4/doc/dvr#custom-path + # @see https://ossrs.net/lts/zh-cn/docs/v4/doc/delivery-hls#hls-config + # Overwrite by env SRS_VHOST_HLS_HLS_FMP4_FILE for all vhosts. + # default: [app]/[stream]-[seq].m4s + hls_fmp4_file [app]/[stream]-[seq].m4s; # the hls entry prefix, which is base url of ts url. # for example, the prefix is: # http://your-server/ diff --git a/trunk/conf/hls.mp4.conf b/trunk/conf/hls.mp4.conf new file mode 100644 index 0000000000..78b8fa3dde --- /dev/null +++ b/trunk/conf/hls.mp4.conf @@ -0,0 +1,26 @@ +# the config for srs to delivery hls +# @see https://ossrs.net/lts/zh-cn/docs/v4/doc/sample-hls +# @see full.conf for detail config. + +listen 1935; +max_connections 1000; +daemon off; +srs_log_tank console; +http_server { + enabled on; + listen 8080; + dir ./objs/nginx/html; +} +http_api { + enabled on; + listen 1985; +} +vhost __defaultVhost__ { + hls { + enabled on; + hls_use_fmp4 on; + hls_path ./objs/nginx/html; + hls_fragment 2; + hls_window 10; + } +} diff --git a/trunk/src/app/srs_app_config.cpp b/trunk/src/app/srs_app_config.cpp index 0731f3cd2b..c3da69efcb 100644 --- a/trunk/src/app/srs_app_config.cpp +++ b/trunk/src/app/srs_app_config.cpp @@ -2683,7 +2683,7 @@ srs_error_t SrsConfig::check_normal_config() && m != "hls_storage" && m != "hls_mount" && m != "hls_td_ratio" && m != "hls_aof_ratio" && m != "hls_acodec" && m != "hls_vcodec" && m != "hls_m3u8_file" && m != "hls_ts_file" && m != "hls_ts_floor" && m != "hls_cleanup" && m != "hls_nb_notify" && m != "hls_wait_keyframe" && m != "hls_dispose" && m != "hls_keys" && m != "hls_fragments_per_key" && m != "hls_key_file" - && m != "hls_key_file_path" && m != "hls_key_url" && m != "hls_dts_directly" && m != "hls_ctx" && m != "hls_ts_ctx") { + && m != "hls_key_file_path" && m != "hls_key_url" && m != "hls_dts_directly" && m != "hls_ctx" && m != "hls_ts_ctx" && m != "hls_use_fmp4" && m != "hls_fmp4_file") { return srs_error_new(ERROR_SYSTEM_CONFIG_INVALID, "illegal vhost.hls.%s of %s", m.c_str(), vhost->arg0().c_str()); } @@ -6936,6 +6936,31 @@ bool SrsConfig::get_hls_enabled(SrsConfDirective* vhost) return SRS_CONF_PREFER_FALSE(conf->arg0()); } +bool SrsConfig::get_hls_use_fmp4(std::string vhost) +{ + SRS_OVERWRITE_BY_ENV_BOOL("srs.vhost.hls.hls_use_fmp4"); // SRS_VHOST_HLS_HLS_USE_FMP4 + + static bool DEFAULT = false; + + SrsConfDirective* conf = get_vhost(vhost); + if (!conf) { + return DEFAULT; + } + + conf = conf->get("hls"); + + if (!conf) { + return DEFAULT; + } + + conf = conf->get("hls_use_fmp4"); + if (!conf || conf->arg0().empty()) { + return DEFAULT; + } + + return SRS_CONF_PREFER_FALSE(conf->arg0()); +} + string SrsConfig::get_hls_entry_prefix(string vhost) { SRS_OVERWRITE_BY_ENV_STRING("srs.vhost.hls.hls_entry_prefix"); // SRS_VHOST_HLS_HLS_ENTRY_PREFIX @@ -7012,6 +7037,25 @@ string SrsConfig::get_hls_ts_file(string vhost) return conf->arg0(); } +string SrsConfig::get_hls_fmp4_file(std::string vhost) +{ + SRS_OVERWRITE_BY_ENV_STRING("srs.vhost.hls.hls_fmp4_file"); // SRS_VHOST_HLS_HLS_FMP4_FILE + + static string DEFAULT = "[app]/[stream]-[seq].m4s"; + + SrsConfDirective* conf = get_hls(vhost); + if (!conf) { + return DEFAULT; + } + + conf = conf->get("hls_fmp4_file"); + if (!conf || conf->arg0().empty()) { + return DEFAULT; + } + + return conf->arg0(); +} + bool SrsConfig::get_hls_ts_floor(string vhost) { SRS_OVERWRITE_BY_ENV_BOOL("srs.vhost.hls.hls_ts_floor"); // SRS_VHOST_HLS_HLS_TS_FLOOR diff --git a/trunk/src/app/srs_app_config.hpp b/trunk/src/app/srs_app_config.hpp index 28aec179db..4c337ffca9 100644 --- a/trunk/src/app/srs_app_config.hpp +++ b/trunk/src/app/srs_app_config.hpp @@ -933,6 +933,8 @@ class SrsConfig // Whether HLS is enabled. virtual bool get_hls_enabled(std::string vhost); virtual bool get_hls_enabled(SrsConfDirective* vhost); + // Whether HLS use fmp4 container format + virtual bool get_hls_use_fmp4(std::string vhost); // Get the HLS m3u8 list ts segment entry prefix info. virtual std::string get_hls_entry_prefix(std::string vhost); // Get the HLS ts/m3u8 file store path. @@ -941,6 +943,8 @@ class SrsConfig virtual std::string get_hls_m3u8_file(std::string vhost); // Get the HLS ts file path template. virtual std::string get_hls_ts_file(std::string vhost); + // Get the HLS fmp4 file path template. + virtual std::string get_hls_fmp4_file(std::string vhost); // Whether enable the floor(timestamp/hls_fragment) for variable timestamp. virtual bool get_hls_ts_floor(std::string vhost); // Get the hls fragment time, in srs_utime_t. @@ -985,6 +989,7 @@ class SrsConfig // Whether enable hls_ctx virtual bool get_hls_ctx_enabled(std::string vhost); // Whether enable session for ts file. + // The ts file including .ts file for MPEG-ts segment, .m4s file and init.mp4 file for fmp4 segment. virtual bool get_hls_ts_ctx_enabled(std::string vhost); // hds section private: diff --git a/trunk/src/app/srs_app_hls.cpp b/trunk/src/app/srs_app_hls.cpp index 9bc9bdfcb6..43b518a715 100644 --- a/trunk/src/app/srs_app_hls.cpp +++ b/trunk/src/app/srs_app_hls.cpp @@ -31,6 +31,7 @@ using namespace std; #include #include #include +#include #include #include #include @@ -76,6 +77,189 @@ srs_error_t SrsHlsSegment::rename() return SrsFragment::rename(); } +SrsInitMp4Segment::SrsInitMp4Segment() +{ + fw_ = new SrsFileWriter(); + init_ = new SrsMp4M2tsInitEncoder(); + const_iv_size_ = 0; +} + +SrsInitMp4Segment::~SrsInitMp4Segment() +{ + srs_freep(init_); + srs_freep(fw_); +} + +srs_error_t SrsInitMp4Segment::config_cipher(unsigned char* kid, unsigned char* const_iv, uint8_t const_iv_size) +{ + srs_error_t err = srs_success; + if (const_iv_size != 8 && const_iv_size != 16) { + return srs_error_new(ERROR_MP4_BOX_STRING, "invalidate const_iv_size"); + } + memcpy(kid_, kid, 16); + memcpy(const_iv_, const_iv, const_iv_size); + const_iv_size_ = const_iv_size; + init_->config_encryption(1, 9, kid_, const_iv, const_iv_size); + + return err; +} + +srs_error_t SrsInitMp4Segment::write(SrsFormat* format, int v_tid, int a_tid) +{ + srs_error_t err = srs_success; + + if ((err = init_encoder()) != srs_success) { + return srs_error_wrap(err, "init encoder"); + } + + if ((err = init_->write(format, v_tid, a_tid)) != srs_success) { + return srs_error_wrap(err, "write init"); + } + + return err; +} + +srs_error_t SrsInitMp4Segment::write_video_only(SrsFormat* format, int v_tid) +{ + srs_error_t err = srs_success; + + if ((err = init_encoder()) != srs_success) { + return srs_error_wrap(err, "init encoder"); + } + + if ((err = init_->write(format, true, v_tid)) != srs_success) { + return srs_error_wrap(err, "write init"); + } + + return err; +} + +srs_error_t SrsInitMp4Segment::write_audio_only(SrsFormat* format, int a_tid) +{ + srs_error_t err = srs_success; + + if ((err = init_encoder()) != srs_success) { + return srs_error_wrap(err, "init encoder"); + } + + if ((err = init_->write(format, false, a_tid)) != srs_success) { + return srs_error_wrap(err, "write init"); + } + + return err; +} + +srs_error_t SrsInitMp4Segment::init_encoder() +{ + srs_error_t err = srs_success; + + srs_assert(!fullpath().empty()); + + string path_tmp = tmppath(); + if ((err = fw_->open(path_tmp)) != srs_success) { + return srs_error_wrap(err, "Open init mp4 failed, path=%s", path_tmp.c_str()); + } + + if ((err = init_->initialize(fw_)) != srs_success) { + return srs_error_wrap(err, "init"); + } + + return err; +} + + +SrsHlsM4sSegment::SrsHlsM4sSegment(SrsFileWriter* fw) +{ + fw_ = fw; + enc_ = new SrsFmp4SegmentEncoder(); +} + +SrsHlsM4sSegment::~SrsHlsM4sSegment() +{ + srs_freep(enc_); +} + +srs_error_t SrsHlsM4sSegment::initialize(int64_t time, uint32_t v_tid, uint32_t a_tid, int sequence_number, std::string m4s_path) +{ + srs_error_t err = srs_success; + + set_path(m4s_path); + + set_number(sequence_number); + if ((err = create_dir()) != srs_success) { + return srs_error_wrap(err, "create dir"); + } + + if ((err = fw_->open(tmppath())) != srs_success) { + return srs_error_wrap(err, "fw open"); + } + + if ((err = enc_->initialize(fw_, sequence_number, time, v_tid, a_tid)) != srs_success) + { + return srs_error_wrap(err, "initialize SrsFmp4SegmentEncoder"); + } + + return err; +} + +void SrsHlsM4sSegment::config_cipher(unsigned char* key, unsigned char* iv) +{ + // TODO: set key and iv to mp4 box + enc_->config_cipher(key, iv); + memcpy(this->iv, iv,16); +} + +srs_error_t SrsHlsM4sSegment::write(SrsSharedPtrMessage* shared_msg, SrsFormat* format) +{ + srs_error_t err = srs_success; + + if (shared_msg->is_audio()) { + uint8_t* sample = (uint8_t*)format->raw; + uint32_t nb_sample = (uint32_t)format->nb_raw; + + uint32_t dts = (uint32_t)shared_msg->timestamp; + if ((err = enc_->write_sample(SrsMp4HandlerTypeSOUN, 0x00, dts, dts, sample, nb_sample)) != srs_success) { + return srs_error_wrap(err, "m4s segment write audio sample"); + } + } else if (shared_msg->is_video()) { + SrsVideoAvcFrameType frame_type = format->video->frame_type; + uint32_t cts = (uint32_t)format->video->cts; + + uint32_t dts = (uint32_t)shared_msg->timestamp; + uint32_t pts = dts + cts; + + uint8_t* sample = (uint8_t*)format->raw; + uint32_t nb_sample = (uint32_t)format->nb_raw; + if ((err = enc_->write_sample(SrsMp4HandlerTypeVIDE, frame_type, dts, pts, sample, nb_sample)) != srs_success) { + return srs_error_wrap(err, "m4s segment write video sample"); + } + } else { + return err; + } + + append(shared_msg->timestamp); + + return err; +} + +srs_error_t SrsHlsM4sSegment::reap(uint64_t& dts) +{ + srs_error_t err = srs_success; + + if ((err = enc_->flush(dts)) != srs_success) { + return srs_error_wrap(err, "Flush encoder failed"); + } + + // srs_freep(fw_); + fw_->close(); + + if ((err = rename()) != srs_success) { + return srs_error_wrap(err, "rename"); + } + + return err; +} + SrsDvrAsyncCallOnHls::SrsDvrAsyncCallOnHls(SrsContextId c, SrsRequest* r, string p, string t, string m, string mu, int s, srs_utime_t d) { req = r->copy(); @@ -151,37 +335,689 @@ srs_error_t SrsDvrAsyncCallOnHlsNotify::call() return err; } - // the http hooks will cause context switch, - // so we must copy all hooks for the on_connect may freed. - // @see https://github.com/ossrs/srs/issues/475 - vector hooks; + // the http hooks will cause context switch, + // so we must copy all hooks for the on_connect may freed. + // @see https://github.com/ossrs/srs/issues/475 + vector hooks; + + if (true) { + SrsConfDirective* conf = _srs_config->get_vhost_on_hls_notify(req->vhost); + + if (!conf) { + return err; + } + + hooks = conf->args; + } + + int nb_notify = _srs_config->get_vhost_hls_nb_notify(req->vhost); + for (int i = 0; i < (int)hooks.size(); i++) { + std::string url = hooks.at(i); + if ((err = SrsHttpHooks::on_hls_notify(cid, url, req, ts_url, nb_notify)) != srs_success) { + return srs_error_wrap(err, "callback on_hls_notify %s", url.c_str()); + } + } + + return err; +} + +string SrsDvrAsyncCallOnHlsNotify::to_string() +{ + return "on_hls_notify: " + ts_url; +} + +SrsHlsFmp4Muxer::SrsHlsFmp4Muxer() +{ + req_ = NULL; + hls_fragment_ = hls_window_ = 0; + hls_aof_ratio_ = 1.0; + deviation_ts_ = 0; + hls_cleanup_ = true; + hls_wait_keyframe_ = true; + previous_floor_ts_ = 0; + accept_floor_ts_ = 0; + hls_ts_floor_ = false; + max_td_ = 0; + writer_ = NULL; + sequence_no_ = 0; + current_ = NULL; + hls_keys_ = false; + hls_fragments_per_key_ = 0; + async_ = new SrsAsyncCallWorker(); + segments_ = new SrsFragmentWindow(); + latest_acodec_ = SrsAudioCodecIdForbidden; + latest_vcodec_ = SrsVideoCodecIdForbidden; + video_track_id_ = 0; + audio_track_id_ = 0; + init_mp4_ready_ = false; + video_dts_ = 0; + + memset(key_, 0, 16); + memset(iv_, 0, 16); +} + +SrsHlsFmp4Muxer::~SrsHlsFmp4Muxer() +{ + srs_freep(segments_); + srs_freep(current_); + srs_freep(req_); + srs_freep(async_); + srs_freep(writer_); +} + +void SrsHlsFmp4Muxer::dispose() +{ + srs_error_t err = srs_success; + + segments_->dispose(); + + if (current_) { + if ((err = current_->unlink_tmpfile()) != srs_success) { + srs_warn("Unlink tmp ts failed %s", srs_error_desc(err).c_str()); + srs_freep(err); + } + srs_freep(current_); + } + + if (unlink(m3u8_.c_str()) < 0) { + srs_warn("dispose unlink path failed. file=%s", m3u8_.c_str()); + } + + srs_trace("gracefully dispose hls %s", req_ ? req_->get_stream_url().c_str() : ""); +} + +int SrsHlsFmp4Muxer::sequence_no() +{ + return sequence_no_; +} + +std::string SrsHlsFmp4Muxer::ts_url() +{ + // return current_ ? current_->uri : ""; + // TODO: impl segment url for fmp4 segment. + return ""; +} + +srs_utime_t SrsHlsFmp4Muxer::duration() +{ + return current_ ? current_->duration() : 0; +} + +int SrsHlsFmp4Muxer::deviation() +{ + // no floor, no deviation. + if (!hls_ts_floor_) { + return 0; + } + + return deviation_ts_; +} + +SrsAudioCodecId SrsHlsFmp4Muxer::latest_acodec() +{ + return latest_acodec_; +} + +void SrsHlsFmp4Muxer::set_latest_acodec(SrsAudioCodecId v) +{ + latest_acodec_ = v; +} + +SrsVideoCodecId SrsHlsFmp4Muxer::latest_vcodec() +{ + return latest_vcodec_; +} + +void SrsHlsFmp4Muxer::set_latest_vcodec(SrsVideoCodecId v) +{ + latest_vcodec_ = v; +} + +srs_error_t SrsHlsFmp4Muxer::initialize(int v_tid, int a_tid) +{ + srs_error_t err = srs_success; + + video_track_id_ = v_tid; + audio_track_id_ = a_tid; + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::on_publish(SrsRequest* req) +{ + srs_error_t err = srs_success; + + if ((err = async_->start()) != srs_success) { + return srs_error_wrap(err, "async start"); + } + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::write_init_mp4(SrsFormat* format, bool has_video, bool has_audio) +{ + srs_error_t err = srs_success; + + std::string vhost = req_->vhost; + std::string stream = req_->stream; + std::string app = req_->app; + std::string path = _srs_config->get_hls_path(vhost); + + path = path + "/" + app + "/" + stream; + if ((err = srs_create_dir_recursively(path)) != srs_success) { + return srs_error_wrap(err, "Create media home failed, home=%s", path.c_str()); + } + + path += "/init.mp4"; + + SrsUniquePtr init_mp4(new SrsInitMp4Segment()); + + init_mp4->set_path(path); + + if (hls_keys_) { + init_mp4->config_cipher(kid_, iv_, 16); + } + + if (has_video && has_audio) { + if ((err = init_mp4->write(format, video_track_id_, audio_track_id_)) != srs_success) { + return srs_error_wrap(err, "write hls init.mp4 with audio and video"); + } + } else if (has_video) { + if ((err = init_mp4->write_video_only(format, video_track_id_)) != srs_success) { + return srs_error_wrap(err, "write hls init.mp4 with video only"); + } + } else if (has_audio) { + if ((err = init_mp4->write_audio_only(format, audio_track_id_)) != srs_success) { + return srs_error_wrap(err, "write hls init.mp4 with audio only"); + } + } else { + return srs_error_new(ERROR_HLS_WRITE_FAILED, "no video and no audio sequence header"); + } + + if ((err = init_mp4->rename()) != srs_success) { + return srs_error_wrap(err, "rename hls init.mp4"); + } + + init_mp4_ready_ = true; + return err; +} + +srs_error_t SrsHlsFmp4Muxer::write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format) +{ + srs_error_t err = srs_success; + + // audio_dts_ = shared_audio->timestamp; + + if (!current_) { + if ((err = segment_open(shared_audio->timestamp * SRS_UTIME_MILLISECONDS)) != srs_success) { + return srs_error_wrap(err, "open segment"); + } + } + + if (current_->duration() >= hls_fragment_) { + if ((err = segment_close()) != srs_success) { + return srs_error_wrap(err, "segment close"); + } + + if ((err = segment_open(shared_audio->timestamp * SRS_UTIME_MILLISECONDS)) != srs_success) { + return srs_error_wrap(err, "open segment"); + } + } + + current_->write(shared_audio, format); + return err; +} + +srs_error_t SrsHlsFmp4Muxer::write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format) +{ + srs_error_t err = srs_success; + + video_dts_ = shared_video->timestamp; + + if (!current_) { + if ((err = segment_open(shared_video->timestamp * SRS_UTIME_MILLISECONDS)) != srs_success) { + return srs_error_wrap(err, "open segment"); + } + } + + // TODO: reap segment only when get key frame? + bool reopen = current_->duration() >= hls_fragment_; + if (reopen) { + if ((err = segment_close()) != srs_success) { + return srs_error_wrap(err, "segment close"); + } + + if ((err = segment_open(shared_video->timestamp * SRS_UTIME_MILLISECONDS)) != srs_success) { + return srs_error_wrap(err, "open segment"); + } + } + + current_->write(shared_video, format); + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::on_unpublish() +{ + async_->stop(); + return srs_success; +} + +srs_error_t SrsHlsFmp4Muxer::update_config(SrsRequest* r) +{ + srs_error_t err = srs_success; + + srs_freep(req_); + req_ = r->copy(); + + std::string vhost = req_->vhost; + std::string stream = req_->stream; + std::string app = req_->app; + + hls_fragment_ = _srs_config->get_hls_fragment(vhost); + double hls_td_ratio = _srs_config->get_hls_td_ratio(vhost); + hls_window_ = _srs_config->get_hls_window(vhost); + + // get the hls m3u8 ts list entry prefix config + hls_entry_prefix_ = _srs_config->get_hls_entry_prefix(vhost); + // get the hls path config + hls_path_ = _srs_config->get_hls_path(vhost); + m3u8_url_ = _srs_config->get_hls_m3u8_file(vhost); + hls_m4s_file_ = _srs_config->get_hls_fmp4_file(vhost); + hls_cleanup_ = _srs_config->get_hls_cleanup(vhost); + hls_wait_keyframe_ = _srs_config->get_hls_wait_keyframe(vhost); + // the audio overflow, for pure audio to reap segment. + hls_aof_ratio_ = _srs_config->get_hls_aof_ratio(vhost); + // whether use floor(timestamp/hls_fragment) for variable timestamp + hls_ts_floor_ = _srs_config->get_hls_ts_floor(vhost); + + hls_keys_ = _srs_config->get_hls_keys(vhost); + hls_fragments_per_key_ = _srs_config->get_hls_fragments_per_key(vhost); + hls_key_file_ = _srs_config->get_hls_key_file(vhost); + hls_key_file_path_ = _srs_config->get_hls_key_file_path(vhost); + hls_key_url_ = _srs_config->get_hls_key_url(vhost); + + previous_floor_ts_ = 0; + accept_floor_ts_ = 0; + deviation_ts_ = 0; + + // generate the m3u8 dir and path. + m3u8_url_ = srs_path_build_stream(m3u8_url_, vhost, app, stream); + m3u8_ = hls_path_ + "/" + m3u8_url_; + + // when update config, reset the history target duration. + max_td_ = hls_fragment_ * hls_td_ratio; + + // create m3u8 dir once. + m3u8_dir_ = srs_path_dirname(m3u8_); + if ((err = srs_create_dir_recursively(m3u8_dir_)) != srs_success) { + return srs_error_wrap(err, "create dir"); + } + + if (hls_keys_ && (hls_path_ != hls_key_file_path_)) { + string key_file = srs_path_build_stream(hls_key_file_, vhost, app, stream); + string key_url = hls_key_file_path_ + "/" + key_file; + string key_dir = srs_path_dirname(key_url); + if ((err = srs_create_dir_recursively(key_dir)) != srs_success) { + return srs_error_wrap(err, "create dir"); + } + } + + writer_ = new SrsFileWriter(); + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::segment_open(srs_utime_t basetime) +{ + srs_error_t err = srs_success; + + if (current_) { + srs_warn("ignore the segment open, for segment is already open."); + return err; + } + + // new segment. + current_ = new SrsHlsM4sSegment(writer_); + current_->sequence_no = sequence_no_++; + + if ((err = write_hls_key()) != srs_success) { + return srs_error_wrap(err, "write hls key"); + } + + // generate filename. + std::string m4s_file = hls_m4s_file_; + m4s_file = srs_path_build_stream(m4s_file, req_->vhost, req_->app, req_->stream); + if (hls_ts_floor_) { + // accept the floor ts for the first piece. + int64_t current_floor_ts = srs_update_system_time() / hls_fragment_; + if (!accept_floor_ts_) { + accept_floor_ts_ = current_floor_ts - 1; + } else { + accept_floor_ts_++; + } + + // jump when deviation more than 10p + if (accept_floor_ts_ - current_floor_ts > SRS_JUMP_WHEN_PIECE_DEVIATION) { + srs_warn("hls: jmp for ts deviation, current=%" PRId64 ", accept=%" PRId64, current_floor_ts, accept_floor_ts_); + accept_floor_ts_ = current_floor_ts - 1; + } + + // when reap ts, adjust the deviation. + deviation_ts_ = (int)(accept_floor_ts_ - current_floor_ts); + + // dup/jmp detect for ts in floor mode. + if (previous_floor_ts_ && previous_floor_ts_ != current_floor_ts - 1) { + srs_warn("hls: dup/jmp ts, previous=%" PRId64 ", current=%" PRId64 ", accept=%" PRId64 ", deviation=%d", + previous_floor_ts_, current_floor_ts, accept_floor_ts_, deviation_ts_); + } + previous_floor_ts_ = current_floor_ts; + + // we always ensure the piece is increase one by one. + std::stringstream ts_floor; + ts_floor << accept_floor_ts_; + m4s_file = srs_string_replace(m4s_file, "[timestamp]", ts_floor.str()); + + // TODO: FIMXE: we must use the accept ts floor time to generate the hour variable. + m4s_file = srs_path_build_timestamp(m4s_file); + } else { + m4s_file = srs_path_build_timestamp(m4s_file); + } + if (true) { + std::stringstream ss; + ss << current_->sequence_no; + m4s_file = srs_string_replace(m4s_file, "[seq]", ss.str()); + } + current_->set_path(hls_path_ + "/" + m4s_file); + + std::string m4s_path = hls_path_ + "/" + m4s_file; + + // the ts url, relative or absolute url. + // TODO: FIXME: Use url and path manager. + std::string ts_url = current_->fullpath(); + if (srs_string_starts_with(ts_url, m3u8_dir_)) { + ts_url = ts_url.substr(m3u8_dir_.length()); + } + while (srs_string_starts_with(ts_url, "/")) { + ts_url = ts_url.substr(1); + } + // current->uri += hls_entry_prefix; + if (!hls_entry_prefix_.empty() && !srs_string_ends_with(hls_entry_prefix_, "/")) { + // current_->uri += "/"; + + // add the http dir to uri. + string http_dir = srs_path_dirname(m3u8_url_); + if (!http_dir.empty()) { + // current->uri += http_dir + "/"; + } + } + + current_->initialize(basetime, video_track_id_, audio_track_id_, sequence_no_, m4s_path); + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::on_sequence_header() +{ + return srs_success; +} + +bool SrsHlsFmp4Muxer::is_segment_overflow() +{ + srs_assert(current_); + + // to prevent very small segment. + if (current_->duration() < 2 * SRS_HLS_SEGMENT_MIN_DURATION) { + return false; + } + + // Use N% deviation, to smoother. + srs_utime_t deviation = hls_ts_floor_ ? SRS_HLS_FLOOR_REAP_PERCENT * deviation_ts_ * hls_fragment_ : 0; + + // Keep in mind that we use max_td for the base duration, not the hls_fragment. To calculate + // max_td, multiply hls_fragment by hls_td_ratio. + return current_->duration() >= max_td_ + deviation; +} + +bool SrsHlsFmp4Muxer::wait_keyframe() +{ + return hls_wait_keyframe_; +} + +bool SrsHlsFmp4Muxer::is_segment_absolutely_overflow() +{ + srs_assert(current_); + + // to prevent very small segment. + if (current_->duration() < 2 * SRS_HLS_SEGMENT_MIN_DURATION) { + return false; + } + + // use N% deviation, to smoother. + srs_utime_t deviation = hls_ts_floor_? SRS_HLS_FLOOR_REAP_PERCENT * deviation_ts_ * hls_fragment_ : 0; + return current_->duration() >= hls_aof_ratio_ * hls_fragment_ + deviation; +} + +void SrsHlsFmp4Muxer::update_duration(uint64_t dts) +{ + current_->append(dts / 90); +} + +srs_error_t SrsHlsFmp4Muxer::segment_close() +{ + srs_error_t err = do_segment_close(); + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::do_segment_close() +{ + srs_error_t err = srs_success; + + if (!current_) { + srs_warn("ignore the segment close, for segment is not open."); + return err; + } + + if ((err = current_->reap(video_dts_)) != srs_success) { + return srs_error_wrap(err, "reap segment"); + } + + // // use async to call the http hooks, for it will cause thread switch. + // if ((err = async_->execute(new SrsDvrAsyncCallOnHls(_srs_context->get_id(), req_, current_->fullpath(), + // current_->uri, m3u8_, m3u8_url_, current_->sequence_no, current_->duration()))) != srs_success) { + // return srs_error_wrap(err, "segment close"); + // } + + // // use async to call the http hooks, for it will cause thread switch. + // if ((err = async_->execute(new SrsDvrAsyncCallOnHlsNotify(_srs_context->get_id(), req_, current_->uri))) != srs_success) { + // return srs_error_wrap(err, "segment close"); + // } + + segments_->append(current_); + current_ = NULL; + + // shrink the segments. + segments_->shrink(hls_window_); + + // refresh the m3u8, donot contains the removed ts + if ((err = refresh_m3u8()) != srs_success) { + return srs_error_wrap(err, "refresh m3u8"); + } + + // remove the ts file. + segments_->clear_expired(hls_cleanup_); + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::write_hls_key() +{ + srs_error_t err = srs_success; + + if (hls_keys_ && current_->sequence_no % hls_fragments_per_key_ == 0) { + if (RAND_bytes(key_, 16) < 0) { + return srs_error_wrap(err, "rand key failed."); + } + if (RAND_bytes(kid_, 16) < 0) { + return srs_error_wrap(err, "rand kid failed."); + } + if (RAND_bytes(iv_, 16) < 0) { + return srs_error_wrap(err, "rand iv failed."); + } + + string key_file = srs_path_build_stream(hls_key_file_, req_->vhost, req_->app, req_->stream); + key_file = srs_string_replace(key_file, "[seq]", srs_int2str(current_->sequence_no)); + string key_url = hls_key_file_path_ + "/" + key_file; + + SrsFileWriter fw; + if ((err = fw.open(key_url)) != srs_success) { + return srs_error_wrap(err, "open file %s", key_url.c_str()); + } + + err = fw.write(key_, 16, NULL); + fw.close(); + + if (err != srs_success) { + return srs_error_wrap(err, "write key"); + } + } + + if (hls_keys_) { + current_->config_cipher(key_, iv_); + } + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::refresh_m3u8() +{ + srs_error_t err = srs_success; + + // no segments, also no m3u8, return. + if (segments_->empty()) { + return err; + } + + std::string temp_m3u8 = m3u8_ + ".temp"; + if ((err = _refresh_m3u8(temp_m3u8)) == srs_success) { + if (rename(temp_m3u8.c_str(), m3u8_.c_str()) < 0) { + err = srs_error_new(ERROR_HLS_WRITE_FAILED, "hls: rename m3u8 file failed. %s => %s", temp_m3u8.c_str(), m3u8_.c_str()); + } + } + + // remove the temp file. + if (srs_path_exists(temp_m3u8)) { + if (unlink(temp_m3u8.c_str()) < 0) { + srs_warn("ignore remove m3u8 failed, %s", temp_m3u8.c_str()); + } + } + + return err; +} + +srs_error_t SrsHlsFmp4Muxer::_refresh_m3u8(std::string m3u8_file) +{ + srs_error_t err = srs_success; + + // no segments, return. + if (segments_->empty()) { + return err; + } + + SrsFileWriter writer; + if ((err = writer.open(m3u8_file)) != srs_success) { + return srs_error_wrap(err, "hls: open m3u8 file %s", m3u8_file.c_str()); + } + + // #EXTM3U\n + // #EXT-X-VERSION:3\n + std::stringstream ss; + ss << "#EXTM3U" << SRS_CONSTS_LF; + // TODO: for fmp4 set #EXT-X-VERSION:7, need support tag #EXT-X-MAP:URI="init.mp4", which + // at least version:5 + // DOC: https://developer.apple.com/documentation/http-live-streaming/about-the-ext-x-version-tag + ss << "#EXT-X-VERSION:7" << SRS_CONSTS_LF; + + // #EXT-X-MEDIA-SEQUENCE:4294967295\n + SrsHlsM4sSegment* first = dynamic_cast(segments_->first()); + if (first == NULL) { + return srs_error_new(ERROR_HLS_WRITE_FAILED, "segments cast"); + } + + ss << "#EXT-X-MEDIA-SEQUENCE:" << first->sequence_no << SRS_CONSTS_LF; + + // #EXT-X-TARGETDURATION:4294967295\n + /** + * @see hls-m3u8-draft-pantos-http-live-streaming-12.pdf, page 25 + * The Media Playlist file MUST contain an EXT-X-TARGETDURATION tag. + * Its value MUST be equal to or greater than the EXTINF duration of any + * media segment that appears or will appear in the Playlist file, + * rounded to the nearest integer. Its value MUST NOT change. A + * typical target duration is 10 seconds. + */ + srs_utime_t max_duration = segments_->max_duration(); + int target_duration = (int)ceil(srsu2msi(srs_max(max_duration, max_td_)) / 1000.0); + + ss << "#EXT-X-TARGETDURATION:" << target_duration << SRS_CONSTS_LF; + + // TODO: add #EXT-X-MAP:URI="init.mp4" for fmp4 + ss << "#EXT-X-MAP:URI=\""<< req_->stream << "/init.mp4\"" << SRS_CONSTS_LF; - if (true) { - SrsConfDirective* conf = _srs_config->get_vhost_on_hls_notify(req->vhost); + // write all segments + for (int i = 0; i < segments_->size(); i++) { + SrsHlsM4sSegment* segment = dynamic_cast(segments_->at(i)); - if (!conf) { - return err; + if (segment->is_sequence_header()) { + // #EXT-X-DISCONTINUITY\n + ss << "#EXT-X-DISCONTINUITY" << SRS_CONSTS_LF; + } + +#if 1 + if(hls_keys_ && ((segment->sequence_no % hls_fragments_per_key_) == 0)) { + char hexiv[33]; + srs_data_to_hex(hexiv, segment->iv, 16); + hexiv[32] = '\0'; + + string key_file = srs_path_build_stream(hls_key_file_, req_->vhost, req_->app, req_->stream); + key_file = srs_string_replace(key_file, "[seq]", srs_int2str(segment->sequence_no)); + + string key_path = key_file; + //if key_url is not set,only use the file name + if (!hls_key_url_.empty()) { + key_path = hls_key_url_ + key_file; + } + + ss << "#EXT-X-KEY:METHOD=SAMPLE-AES,URI=" << "\"" << key_path << "\",IV=0x" << hexiv << SRS_CONSTS_LF; } +#endif - hooks = conf->args; + // "#EXTINF:4294967295.208,\n" + ss.precision(3); + ss.setf(std::ios::fixed, std::ios::floatfield); + ss << "#EXTINF:" << srsu2msi(segment->duration()) / 1000.0 << ", no desc" << SRS_CONSTS_LF; + + // {file name}\n + // TODO get segment name in relative path. + std::string seg_uri = segment->fullpath(); + if (true) { + std::stringstream stemp; + stemp << srsu2msi(segment->duration()); + seg_uri = srs_string_replace(seg_uri, "[duration]", stemp.str()); + } + //ss << segment->uri << SRS_CONSTS_LF; + ss << srs_path_basename(seg_uri) << SRS_CONSTS_LF; } - int nb_notify = _srs_config->get_vhost_hls_nb_notify(req->vhost); - for (int i = 0; i < (int)hooks.size(); i++) { - std::string url = hooks.at(i); - if ((err = SrsHttpHooks::on_hls_notify(cid, url, req, ts_url, nb_notify)) != srs_success) { - return srs_error_wrap(err, "callback on_hls_notify %s", url.c_str()); - } + // write m3u8 to writer. + std::string m3u8 = ss.str(); + if ((err = writer.write((char*)m3u8.c_str(), (int)m3u8.length(), NULL)) != srs_success) { + return srs_error_wrap(err, "hls: write m3u8"); } return err; } -string SrsDvrAsyncCallOnHlsNotify::to_string() -{ - return "on_hls_notify: " + ts_url; -} - SrsHlsMuxer::SrsHlsMuxer() { req = NULL; @@ -800,6 +1636,9 @@ srs_error_t SrsHlsMuxer::_refresh_m3u8(string m3u8_file) // #EXT-X-VERSION:3\n std::stringstream ss; ss << "#EXTM3U" << SRS_CONSTS_LF; + // TODO: for fmp4 set #EXT-X-VERSION:7, need support tag #EXT-X-MAP:URI="init.mp4", which + // at least version:5 + // DOC: https://developer.apple.com/documentation/http-live-streaming/about-the-ext-x-version-tag ss << "#EXT-X-VERSION:3" << SRS_CONSTS_LF; // #EXT-X-MEDIA-SEQUENCE:4294967295\n @@ -823,6 +1662,8 @@ srs_error_t SrsHlsMuxer::_refresh_m3u8(string m3u8_file) int target_duration = (int)ceil(srsu2msi(srs_max(max_duration, max_td)) / 1000.0); ss << "#EXT-X-TARGETDURATION:" << target_duration << SRS_CONSTS_LF; + + // TODO: add #EXT-X-MAP:URI="init.mp4" for fmp4 // write all segments for (int i = 0; i < segments->size(); i++) { @@ -875,10 +1716,22 @@ srs_error_t SrsHlsMuxer::_refresh_m3u8(string m3u8_file) return err; } +ISrsHlsController::ISrsHlsController() +{ +} + +ISrsHlsController::~ISrsHlsController() +{ +} + SrsHlsController::SrsHlsController() { tsmc = new SrsTsMessageCache(); muxer = new SrsHlsMuxer(); + + hls_dts_directly = false; + previous_audio_dts = 0; + aac_samples = 0; } SrsHlsController::~SrsHlsController() @@ -972,7 +1825,9 @@ srs_error_t SrsHlsController::on_publish(SrsRequest* req) } // This config item is used in SrsHls, we just log its value here. - bool hls_dts_directly = _srs_config->get_vhost_hls_dts_directly(req->vhost); + // If enabled, directly turn FLV timestamp to TS DTS. + // @remark It'll be reloaded automatically, because the origin hub will republish while reloading. + hls_dts_directly = _srs_config->get_vhost_hls_dts_directly(req->vhost); srs_trace("hls: win=%dms, frag=%dms, prefix=%s, path=%s, m3u8=%s, ts=%s, tdr=%.2f, aof=%.2f, floor=%d, clean=%d, waitk=%d, dispose=%dms, dts_directly=%d", srsu2msi(hls_window), srsu2msi(hls_fragment), entry_prefix.c_str(), path.c_str(), m3u8_file.c_str(), ts_file.c_str(), @@ -1000,7 +1855,7 @@ srs_error_t SrsHlsController::on_unpublish() return err; } -srs_error_t SrsHlsController::on_sequence_header() +srs_error_t SrsHlsController::on_sequence_header(SrsSharedPtrMessage* msg, SrsFormat* format) { // TODO: support discontinuity for the same stream // currently we reap and insert discontinity when encoder republish, @@ -1011,10 +1866,50 @@ srs_error_t SrsHlsController::on_sequence_header() return muxer->on_sequence_header(); } -srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts) +srs_error_t SrsHlsController::write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format) { srs_error_t err = srs_success; + SrsAudioFrame* frame = format->audio; + + // Reset the aac samples counter when DTS jitter. + if (previous_audio_dts > shared_audio->timestamp) { + previous_audio_dts = shared_audio->timestamp; + aac_samples = 0; + } + + // The diff duration in ms between two FLV audio packets. + int diff = ::abs((int)(shared_audio->timestamp - previous_audio_dts)); + previous_audio_dts = shared_audio->timestamp; + + // Guess the number of samples for each AAC frame. + // If samples is 1024, the sample-rate is 8000HZ, the diff should be 1024/8000s=128ms. + // If samples is 1024, the sample-rate is 44100HZ, the diff should be 1024/44100s=23ms. + // If samples is 2048, the sample-rate is 44100HZ, the diff should be 2048/44100s=46ms. + int nb_samples_per_frame = 0; + int guessNumberOfSamples = diff * srs_flv_srates[format->acodec->sound_rate] / 1000; + if (guessNumberOfSamples > 0) { + if (guessNumberOfSamples < 960) { + nb_samples_per_frame = 960; + } else if (guessNumberOfSamples < 1536) { + nb_samples_per_frame = 1024; + } else if (guessNumberOfSamples < 3072) { + nb_samples_per_frame = 2048; + } else { + nb_samples_per_frame = 4096; + } + } + + // Recalc the DTS by the samples of AAC. + aac_samples += nb_samples_per_frame; + int64_t dts = 90000 * aac_samples / srs_flv_srates[format->acodec->sound_rate]; + + // If directly turn FLV timestamp, overwrite the guessed DTS. + // @doc https://github.com/ossrs/srs/issues/1506#issuecomment-562063095 + if (hls_dts_directly) { + dts = shared_audio->timestamp * 90; + } + // Refresh the codec ASAP. if (muxer->latest_acodec() != frame->acodec()->id) { srs_trace("HLS: Switch audio codec %d(%s) to %d(%s)", muxer->latest_acodec(), srs_audio_codec_id2str(muxer->latest_acodec()).c_str(), @@ -1023,7 +1918,7 @@ srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts) } // write audio to cache. - if ((err = tsmc->cache_audio(frame, pts)) != srs_success) { + if ((err = tsmc->cache_audio(frame, dts)) != srs_success) { return srs_error_wrap(err, "hls: cache audio"); } @@ -1046,7 +1941,7 @@ srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts) // for pure audio, aggregate some frame to one. // TODO: FIXME: Check whether it's necessary. if (muxer->pure_audio() && tsmc->audio) { - if (pts - tsmc->audio->start_pts < SRS_CONSTS_HLS_PURE_AUDIO_AGGREGATE) { + if (dts - tsmc->audio->start_pts < SRS_CONSTS_HLS_PURE_AUDIO_AGGREGATE) { return err; } } @@ -1062,9 +1957,11 @@ srs_error_t SrsHlsController::write_audio(SrsAudioFrame* frame, int64_t pts) return err; } -srs_error_t SrsHlsController::write_video(SrsVideoFrame* frame, int64_t dts) +srs_error_t SrsHlsController::write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format) { srs_error_t err = srs_success; + SrsVideoFrame* frame = format->video; + int64_t dts = shared_video->timestamp * 90; // Refresh the codec ASAP. if (muxer->latest_vcodec() != frame->vcodec()->id) { @@ -1142,6 +2039,165 @@ srs_error_t SrsHlsController::reap_segment() return err; } +SrsHlsMp4Controller::SrsHlsMp4Controller() +{ + has_video_sh_ = false; + has_audio_sh_ = false; + + video_track_id_ = 1; + audio_track_id_ = 2; + + audio_dts_ = 0; + video_dts_ = 0; + + req_ = NULL; + muxer_ = new SrsHlsFmp4Muxer(); +} + +SrsHlsMp4Controller::~SrsHlsMp4Controller() +{ + srs_freep(muxer_); +} + +srs_error_t SrsHlsMp4Controller::initialize() +{ + srs_error_t err = srs_success; + if ((err = muxer_->initialize(video_track_id_, audio_track_id_)) != srs_success) { + return srs_error_wrap(err, "initialize SrsHlsFmp4Muxer"); + } + + return err; +} + +void SrsHlsMp4Controller::dispose() +{ + muxer_->dispose(); +} + +srs_error_t SrsHlsMp4Controller::on_publish(SrsRequest* req) +{ + srs_error_t err = srs_success; + + req_ = req; + std::string vhost = req->vhost; + std::string stream = req->stream; + std::string app = req->app; + + // get the hls m3u8 ts list entry prefix config + std::string entry_prefix = _srs_config->get_hls_entry_prefix(vhost); + // get the hls path config + std::string path = _srs_config->get_hls_path(vhost); + std::string m3u8_file = _srs_config->get_hls_m3u8_file(vhost); + std::string ts_file = _srs_config->get_hls_ts_file(vhost); + + if ((err = muxer_->on_publish(req)) != srs_success) { + return srs_error_wrap(err, "muxer publish"); + } + + if ((err = muxer_->update_config(req)) != srs_success ) { + return srs_error_wrap(err, "hls: update config"); + } + + return err; +} + +srs_error_t SrsHlsMp4Controller::on_unpublish() +{ + srs_error_t err = srs_success; + req_ = NULL; + + if ((err = muxer_->segment_close()) != srs_success) { + return srs_error_wrap(err, "hls: segment close"); + } + + if ((err = muxer_->on_unpublish()) != srs_success) { + return srs_error_wrap(err, "muxer unpublish"); + } + + return err; +} + +srs_error_t SrsHlsMp4Controller::write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format) +{ + srs_error_t err = srs_success; + + // Ignore audio sequence header + if (format->is_aac_sequence_header() || format->is_mp3_sequence_header()) { + return err; + } + + audio_dts_ = shared_audio->timestamp; + muxer_->write_audio(shared_audio, format); + + return err; +} + +srs_error_t SrsHlsMp4Controller::write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format) +{ + srs_error_t err = srs_success; + SrsVideoFrame* frame = format->video; + + // Refresh the codec ASAP. + if (muxer_->latest_vcodec() != frame->vcodec()->id) { + srs_trace("HLS: Switch video codec %d(%s) to %d(%s)", muxer_->latest_acodec(), srs_video_codec_id2str(muxer_->latest_vcodec()).c_str(), + frame->vcodec()->id, srs_video_codec_id2str(frame->vcodec()->id).c_str()); + muxer_->set_latest_vcodec(frame->vcodec()->id); + } + + video_dts_ = shared_video->timestamp; + + muxer_->write_video(shared_video, format); + + return err; +} + +srs_error_t SrsHlsMp4Controller::on_sequence_header(SrsSharedPtrMessage* msg, SrsFormat* format) +{ + srs_error_t err = srs_success; + + if (req_ == NULL) { + return srs_error_new(ERROR_HLS_NO_STREAM, "no req yet"); + } + + // TODO: on av sequence header, doing generate the init.mp4? + if (msg->is_video()) { + has_video_sh_ = true; + } + + if (msg->is_audio()) { + if (format->acodec->aac_extra_data.size() == 0) { + srs_trace("the audio codec's aac extra data is empty"); + return err; + } + + has_audio_sh_ = true; + } + + muxer_->write_init_mp4(format, has_video_sh_, has_audio_sh_); + + return err; +} + +int SrsHlsMp4Controller::sequence_no() +{ + return 0; +} + +std::string SrsHlsMp4Controller::ts_url() +{ + return ""; +} + +srs_utime_t SrsHlsMp4Controller::duration() +{ + return 0; +} + +int SrsHlsMp4Controller::deviation() +{ + return 0; +} + SrsHls::SrsHls() { req = NULL; @@ -1152,13 +2208,10 @@ SrsHls::SrsHls() unpublishing_ = false; async_reload_ = reloading_ = false; last_update_time = 0; - hls_dts_directly = false; - - previous_audio_dts = 0; - aac_samples = 0; jitter = new SrsRtmpJitter(); - controller = new SrsHlsController(); + // TODO: replace NULL by a dummy ISrsHlsController + controller = NULL; pprint = SrsPithyPrint::create_hls(); } @@ -1292,6 +2345,16 @@ srs_error_t SrsHls::initialize(SrsOriginHub* h, SrsRequest* r) hub = h; req = r; + + bool is_fmp4_enabled = _srs_config->get_hls_use_fmp4(r->vhost); + + if (!controller) { + if (is_fmp4_enabled) { + controller = new SrsHlsMp4Controller(); + } else { + controller = new SrsHlsController(); + } + } if ((err = controller->initialize()) != srs_success) { return srs_error_wrap(err, "controller initialize"); @@ -1319,10 +2382,6 @@ srs_error_t SrsHls::on_publish() if ((err = controller->on_publish(req)) != srs_success) { return srs_error_wrap(err, "hls: on publish"); } - - // If enabled, directly turn FLV timestamp to TS DTS. - // @remark It'll be reloaded automatically, because the origin hub will republish while reloading. - hls_dts_directly = _srs_config->get_vhost_hls_dts_directly(req->vhost); // if enabled, open the muxer. enabled = true; @@ -1367,6 +2426,7 @@ srs_error_t SrsHls::on_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* forma // Ignore if no format->acodec, it means the codec is not parsed, or unknown codec. // @issue https://github.com/ossrs/srs/issues/1506#issuecomment-562079474 + // TODO: format->acodec is always not-nil, remove this check. if (!format->acodec) { return err; } @@ -1384,8 +2444,9 @@ srs_error_t SrsHls::on_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* forma // ignore sequence header srs_assert(format->audio); - if (acodec == SrsAudioCodecIdAAC && format->audio->aac_packet_type == SrsAudioAacFrameTraitSequenceHeader) { - return controller->on_sequence_header(); + // TODO: verify mp3 play by HLS. + if (format->is_aac_sequence_header() || format->is_mp3_sequence_header()) { + return controller->on_sequence_header(audio.get(), format); } // TODO: FIXME: config the jitter of HLS. @@ -1393,45 +2454,7 @@ srs_error_t SrsHls::on_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* forma return srs_error_wrap(err, "hls: jitter"); } - // Reset the aac samples counter when DTS jitter. - if (previous_audio_dts > audio->timestamp) { - previous_audio_dts = audio->timestamp; - aac_samples = 0; - } - - // The diff duration in ms between two FLV audio packets. - int diff = ::abs((int)(audio->timestamp - previous_audio_dts)); - previous_audio_dts = audio->timestamp; - - // Guess the number of samples for each AAC frame. - // If samples is 1024, the sample-rate is 8000HZ, the diff should be 1024/8000s=128ms. - // If samples is 1024, the sample-rate is 44100HZ, the diff should be 1024/44100s=23ms. - // If samples is 2048, the sample-rate is 44100HZ, the diff should be 2048/44100s=46ms. - int nb_samples_per_frame = 0; - int guessNumberOfSamples = diff * srs_flv_srates[format->acodec->sound_rate] / 1000; - if (guessNumberOfSamples > 0) { - if (guessNumberOfSamples < 960) { - nb_samples_per_frame = 960; - } else if (guessNumberOfSamples < 1536) { - nb_samples_per_frame = 1024; - } else if (guessNumberOfSamples < 3072) { - nb_samples_per_frame = 2048; - } else { - nb_samples_per_frame = 4096; - } - } - - // Recalc the DTS by the samples of AAC. - aac_samples += nb_samples_per_frame; - int64_t dts = 90000 * aac_samples / srs_flv_srates[format->acodec->sound_rate]; - - // If directly turn FLV timestamp, overwrite the guessed DTS. - // @doc https://github.com/ossrs/srs/issues/1506#issuecomment-562063095 - if (hls_dts_directly) { - dts = audio->timestamp * 90; - } - - if ((err = controller->write_audio(format->audio, dts)) != srs_success) { + if ((err = controller->write_audio(audio.get(), format)) != srs_success) { return srs_error_wrap(err, "hls: write audio"); } @@ -1469,9 +2492,11 @@ srs_error_t SrsHls::on_video(SrsSharedPtrMessage* shared_video, SrsFormat* forma return err; } - // ignore sequence header - if (format->video->avc_packet_type == SrsVideoAvcFrameTraitSequenceHeader) { - return controller->on_sequence_header(); + // ignore sequence header avc and hevc + // is avc|hevc|av1 sequence header check, but av1 packet already ignored above. so it's ok to use + // below method. + if (format->is_avc_sequence_header()) { + return controller->on_sequence_header(video.get(), format); } // TODO: FIXME: config the jitter of HLS. @@ -1479,8 +2504,7 @@ srs_error_t SrsHls::on_video(SrsSharedPtrMessage* shared_video, SrsFormat* forma return srs_error_wrap(err, "hls: jitter"); } - int64_t dts = video->timestamp * 90; - if ((err = controller->write_video(format->video, dts)) != srs_success) { + if ((err = controller->write_video(video.get(), format)) != srs_success) { return srs_error_wrap(err, "hls: write video"); } diff --git a/trunk/src/app/srs_app_hls.hpp b/trunk/src/app/srs_app_hls.hpp index e0866da252..4e8144a718 100644 --- a/trunk/src/app/srs_app_hls.hpp +++ b/trunk/src/app/srs_app_hls.hpp @@ -32,11 +32,14 @@ class SrsTsAacJitter; class SrsTsMessageCache; class SrsHlsSegment; class SrsTsContext; +class SrsMp4M2tsInitEncoder; +class SrsFmp4SegmentEncoder; // The wrapper of m3u8 segment from specification: // // 3.3.2. EXTINF // The EXTINF tag specifies the duration of a media segment. +// TODO: refactor this to support fmp4 segment. class SrsHlsSegment : public SrsFragment { public: @@ -56,11 +59,58 @@ class SrsHlsSegment : public SrsFragment SrsHlsSegment(SrsTsContext* c, SrsAudioCodecId ac, SrsVideoCodecId vc, SrsFileWriter* w); virtual ~SrsHlsSegment(); public: - void config_cipher(unsigned char* key,unsigned char* iv); + void config_cipher(unsigned char* key, unsigned char* iv); // replace the placeholder virtual srs_error_t rename(); }; +class SrsInitMp4Segment : public SrsFragment +{ +private: + SrsFileWriter* fw_; + SrsMp4M2tsInitEncoder* init_; + + unsigned char kid_[16]; + unsigned char const_iv_[16]; + uint8_t const_iv_size_; + +public: + SrsInitMp4Segment(); + virtual ~SrsInitMp4Segment(); + +public: + + virtual srs_error_t config_cipher(unsigned char* kid, unsigned char* const_iv, uint8_t const_iv_size); + // Write the init mp4 file, with the v_tid(video track id) and a_tid (audio track id). + virtual srs_error_t write(SrsFormat* format, int v_tid, int a_tid); + + virtual srs_error_t write_video_only(SrsFormat* format, int v_tid); + virtual srs_error_t write_audio_only(SrsFormat* format, int a_tid); +private: + virtual srs_error_t init_encoder(); +}; + +// TODO: merge this code with SrsFragmentedMp4 in dash +class SrsHlsM4sSegment : public SrsFragment +{ +private: + SrsFileWriter* fw_; + SrsFmp4SegmentEncoder* enc_; +public: + // sequence number in m3u8. + int sequence_no; + // Will be saved in m3u8 file. + unsigned char iv[16]; +public: + SrsHlsM4sSegment(SrsFileWriter* fw); + virtual ~SrsHlsM4sSegment(); + + virtual srs_error_t initialize(int64_t time, uint32_t v_tid, uint32_t a_tid, int sequence_number, std::string m4s_path); + virtual void config_cipher(unsigned char* key, unsigned char* iv); + virtual srs_error_t write(SrsSharedPtrMessage* shared_msg, SrsFormat* format); + virtual srs_error_t reap(uint64_t& dts); +}; + // The hls async call: on_hls class SrsDvrAsyncCallOnHls : public ISrsAsyncCallTask { @@ -217,6 +267,155 @@ class SrsHlsMuxer virtual srs_error_t _refresh_m3u8(std::string m3u8_file); }; +// Mux the HLS stream(m3u8 and m4s files). +// Generally, the m3u8 muxer only provides methods to open/close segments, +// to flush video/audio, without any mechenisms. +// +// That is, user must use HlsCache, which will control the methods of muxer, +// and provides HLS mechenisms. +class SrsHlsFmp4Muxer +{ +private: + SrsRequest* req_; +private: + std::string hls_entry_prefix_; + std::string hls_path_; + std::string hls_m4s_file_; + bool hls_cleanup_; + bool hls_wait_keyframe_; + std::string m3u8_dir_; + double hls_aof_ratio_; + // TODO: FIXME: Use TBN 1000. + srs_utime_t hls_fragment_; + srs_utime_t hls_window_; + SrsAsyncCallWorker* async_; +private: + // Whether use floor algorithm for timestamp. + bool hls_ts_floor_; + // The deviation in piece to adjust the fragment to be more + // bigger or smaller. + int deviation_ts_; + // The previous reap floor timestamp, + // used to detect the dup or jmp or ts. + int64_t accept_floor_ts_; + int64_t previous_floor_ts_; + bool init_mp4_ready_; +private: + // Whether encrypted or not + bool hls_keys_; + int hls_fragments_per_key_; + // The key file name + std::string hls_key_file_; + // The key file path + std::string hls_key_file_path_; + // The key file url + std::string hls_key_url_; + // The key and iv. + unsigned char key_[16]; + unsigned char kid_[16]; + unsigned char iv_[16]; + // The underlayer file writer. + SrsFileWriter* writer_; +private: + int sequence_no_; + srs_utime_t max_td_; + std::string m3u8_; + std::string m3u8_url_; + int video_track_id_; + int audio_track_id_; + uint64_t video_dts_; +private: + // The available cached segments in m3u8. + SrsFragmentWindow* segments_; + // The current writing segment. + SrsHlsM4sSegment* current_; + +private: + // Latest audio codec, parsed from stream. + SrsAudioCodecId latest_acodec_; + // Latest audio codec, parsed from stream. + SrsVideoCodecId latest_vcodec_; +public: + SrsHlsFmp4Muxer(); + virtual ~SrsHlsFmp4Muxer(); +public: + virtual void dispose(); +public: + virtual int sequence_no(); + virtual std::string ts_url(); + virtual srs_utime_t duration(); + virtual int deviation(); +public: + SrsAudioCodecId latest_acodec(); + void set_latest_acodec(SrsAudioCodecId v); + SrsVideoCodecId latest_vcodec(); + void set_latest_vcodec(SrsVideoCodecId v); +public: + // Initialize the hls muxer. + virtual srs_error_t initialize(int v_tid, int a_tid); + // When publish or unpublish stream. + virtual srs_error_t on_publish(SrsRequest* req); + + virtual srs_error_t write_init_mp4(SrsFormat* format, bool has_video, bool has_audio); + virtual srs_error_t write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format); + virtual srs_error_t write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format); + + virtual srs_error_t on_unpublish(); + // When publish, update the config for muxer. + virtual srs_error_t update_config(SrsRequest* r); + // Open a new segment(a new ts file) + virtual srs_error_t segment_open(srs_utime_t basetime); + virtual srs_error_t on_sequence_header(); + // Whether segment overflow, + // that is whether the current segment duration>=(the segment in config) + virtual bool is_segment_overflow(); + // Whether wait keyframe to reap the ts. + virtual bool wait_keyframe(); + // Whether segment absolutely overflow, for pure audio to reap segment, + // that is whether the current segment duration>=2*(the segment in config) + virtual bool is_segment_absolutely_overflow(); +public: + // Whether current hls muxer is pure audio mode. +// virtual bool pure_audio(); +// virtual srs_error_t flush_audio(SrsTsMessageCache* cache); +// virtual srs_error_t flush_video(SrsTsMessageCache* cache); + // When flushing video or audio, we update the duration. But, we should also update the + // duration before closing the segment. Keep in mind that it's fine to update the duration + // several times using the same dts timestamp. + void update_duration(uint64_t dts); + // Close segment(ts). + virtual srs_error_t segment_close(); +private: + virtual srs_error_t do_segment_close(); + virtual srs_error_t write_hls_key(); + virtual srs_error_t refresh_m3u8(); + virtual srs_error_t _refresh_m3u8(std::string m3u8_file); +}; + +// The base class for HLS controller +class ISrsHlsController +{ +public: + ISrsHlsController(); + virtual ~ISrsHlsController(); + +public: + virtual srs_error_t initialize() = 0; + virtual void dispose() = 0; + // When publish or unpublish stream. + virtual srs_error_t on_publish(SrsRequest* req) = 0; + virtual srs_error_t on_unpublish() = 0; + + virtual srs_error_t write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format) = 0; + virtual srs_error_t write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format) = 0; + + virtual srs_error_t on_sequence_header(SrsSharedPtrMessage* msg, SrsFormat* format) = 0; + virtual int sequence_no() = 0; + virtual std::string ts_url() = 0; + virtual srs_utime_t duration() = 0; + virtual int deviation() = 0; +}; + // The hls stream cache, // use to cache hls stream and flush to hls muxer. // @@ -232,14 +431,23 @@ class SrsHlsMuxer // when timestamp convert to flv tbn, it will loose precise, // so we must gather audio frame together, and recalc the timestamp @see SrsTsAacJitter, // we use a aac jitter to correct the audio pts. -class SrsHlsController +class SrsHlsController : public ISrsHlsController { private: // The HLS muxer to reap ts and m3u8. // The TS is cached to SrsTsMessageCache then flush to ts segment. SrsHlsMuxer* muxer; // The TS cache + // TODO: support both fmp4 and ts format SrsTsMessageCache* tsmc; + + // If the diff=dts-previous_audio_dts is about 23, + // that's the AAC samples is 1024, and we use the samples to calc the dts. + int64_t previous_audio_dts; + // The total aac samples. + uint64_t aac_samples; + // Whether directly turn FLV timestamp to TS DTS. + bool hls_dts_directly; public: SrsHlsController(); virtual ~SrsHlsController(); @@ -258,11 +466,11 @@ class SrsHlsController // must write a #EXT-X-DISCONTINUITY to m3u8. // @see: hls-m3u8-draft-pantos-http-live-streaming-12.txt // @see: 3.4.11. EXT-X-DISCONTINUITY - virtual srs_error_t on_sequence_header(); + virtual srs_error_t on_sequence_header(SrsSharedPtrMessage* shared_audio, SrsFormat* format); // write audio to cache, if need to flush, flush to muxer. - virtual srs_error_t write_audio(SrsAudioFrame* frame, int64_t pts); + virtual srs_error_t write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format); // write video to muxer. - virtual srs_error_t write_video(SrsVideoFrame* frame, int64_t dts); + virtual srs_error_t write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format); private: // Reopen the muxer for a new hls segment, // close current segment, open a new segment, @@ -271,12 +479,51 @@ class SrsHlsController virtual srs_error_t reap_segment(); }; -// Transmux RTMP stream to HLS(m3u8 and ts). +class SrsHlsMp4Controller : public ISrsHlsController +{ +private: + bool has_video_sh_; + bool has_audio_sh_; + + int video_track_id_; + int audio_track_id_; + + // Current audio dts. + uint64_t audio_dts_; + // Current video dts. + uint64_t video_dts_; + + SrsRequest* req_; + + SrsHlsFmp4Muxer* muxer_; + +public: + SrsHlsMp4Controller(); + virtual ~SrsHlsMp4Controller(); + +public: + virtual srs_error_t initialize(); + virtual void dispose(); + // When publish or unpublish stream. + virtual srs_error_t on_publish(SrsRequest* req); + virtual srs_error_t on_unpublish(); + virtual srs_error_t write_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format); + virtual srs_error_t write_video(SrsSharedPtrMessage* shared_video, SrsFormat* format); + + virtual srs_error_t on_sequence_header(SrsSharedPtrMessage* shared_audio, SrsFormat* format); + virtual int sequence_no(); + virtual std::string ts_url(); + virtual srs_utime_t duration(); + virtual int deviation(); +}; + + +// Transmux RTMP stream to HLS(m3u8 and ts,fmp4). // TODO: FIXME: add utest for hls. class SrsHls { private: - SrsHlsController* controller; + ISrsHlsController* controller; private: SrsRequest* req; // Whether the HLS is enabled. @@ -290,14 +537,7 @@ class SrsHls bool reloading_; // To detect heartbeat and dispose it if configured. srs_utime_t last_update_time; -private: - // If the diff=dts-previous_audio_dts is about 23, - // that's the AAC samples is 1024, and we use the samples to calc the dts. - int64_t previous_audio_dts; - // The total aac samples. - uint64_t aac_samples; - // Whether directly turn FLV timestamp to TS DTS. - bool hls_dts_directly; + private: SrsOriginHub* hub; SrsRtmpJitter* jitter; diff --git a/trunk/src/app/srs_app_http_static.hpp b/trunk/src/app/srs_app_http_static.hpp index d4682370d8..9c679acf50 100644 --- a/trunk/src/app/srs_app_http_static.hpp +++ b/trunk/src/app/srs_app_http_static.hpp @@ -74,6 +74,7 @@ class SrsVodStream : public SrsHttpFileServer virtual srs_error_t serve_mp4_stream(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath, int64_t start, int64_t end); // Support HLS streaming with pseudo session id. virtual srs_error_t serve_m3u8_ctx(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); + // the ts file including: .ts .m4s init.mp4 virtual srs_error_t serve_ts_ctx(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); }; diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index a1462d3328..9d984a1aa6 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -1298,6 +1298,7 @@ class SrsVideoFrame : public SrsFrame { public: // video specified + // TODO: H.264 and H.265 reused AvcFrameType and AvcFrameTrait? SrsVideoAvcFrameType frame_type; SrsVideoAvcFrameTrait avc_packet_type; // whether sample_units contains IDR frame. @@ -1361,6 +1362,7 @@ class SrsFormat public: virtual bool is_aac_sequence_header(); virtual bool is_mp3_sequence_header(); + // TODO: is avc|hevc|av1 sequence header virtual bool is_avc_sequence_header(); private: // Demux the video packet in H.264 codec. diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp index b4d597f7ee..d72dd7c979 100644 --- a/trunk/src/kernel/srs_kernel_mp4.cpp +++ b/trunk/src/kernel/srs_kernel_mp4.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -759,15 +760,8 @@ void SrsMp4MovieFragmentBox::set_mfhd(SrsMp4MovieFragmentHeaderBox* v) boxes.push_back(v); } -SrsMp4TrackFragmentBox* SrsMp4MovieFragmentBox::traf() +void SrsMp4MovieFragmentBox::add_traf(SrsMp4TrackFragmentBox* v) { - SrsMp4Box* box = get(SrsMp4BoxTypeTRAF); - return dynamic_cast(box); -} - -void SrsMp4MovieFragmentBox::set_traf(SrsMp4TrackFragmentBox* v) -{ - remove(SrsMp4BoxTypeTRAF); boxes.push_back(v); } @@ -1647,15 +1641,8 @@ SrsMp4MovieExtendsBox::~SrsMp4MovieExtendsBox() { } -SrsMp4TrackExtendsBox* SrsMp4MovieExtendsBox::trex() -{ - SrsMp4Box* box = get(SrsMp4BoxTypeTREX); - return dynamic_cast(box); -} - -void SrsMp4MovieExtendsBox::set_trex(SrsMp4TrackExtendsBox* v) +void SrsMp4MovieExtendsBox::add_trex(SrsMp4TrackExtendsBox* v) { - remove(SrsMp4BoxTypeTREX); boxes.push_back(v); } @@ -4790,843 +4777,1499 @@ stringstream& SrsMp4SegmentIndexBox::dumps_detail(stringstream& ss, SrsMp4DumpCo return ss; } -SrsMp4Sample::SrsMp4Sample() +SrsMp4SampleAuxiliaryInfoSizeBox::SrsMp4SampleAuxiliaryInfoSizeBox() { - type = SrsFrameTypeForbidden; - offset = 0; - index = 0; - dts = pts = 0; - nb_data = 0; - data = NULL; - frame_type = SrsVideoAvcFrameTypeForbidden; - tbn = 0; - adjust = 0; + type = SrsMp4BoxTypeSAIZ; } -SrsMp4Sample::~SrsMp4Sample() +SrsMp4SampleAuxiliaryInfoSizeBox::~SrsMp4SampleAuxiliaryInfoSizeBox() { - srs_freepa(data); } -uint32_t SrsMp4Sample::dts_ms() +int SrsMp4SampleAuxiliaryInfoSizeBox::nb_header() { - return (uint32_t)(dts * 1000 / tbn) + adjust; -} + int size = SrsMp4FullBox::nb_header(); -uint32_t SrsMp4Sample::pts_ms() -{ - return (uint32_t)(pts * 1000 / tbn) + adjust; -} + if (flags & 0x01) { + size += 8; // add sizeof(aux_info_type) + sizeof(aux_info_type_parameter); + } -SrsMp4SampleManager::SrsMp4SampleManager() -{ -} + size += 1; // sizeof(default_sample_info_size); + size += 4; // sizeof(sample_count); -SrsMp4SampleManager::~SrsMp4SampleManager() -{ - vector::iterator it; - for (it = samples.begin(); it != samples.end(); ++it) { - SrsMp4Sample* sample = *it; - srs_freep(sample); + if (default_sample_info_size == 0) { + size += sample_info_sizes.size(); } - samples.clear(); + + return size; } -srs_error_t SrsMp4SampleManager::load(SrsMp4MovieBox* moov) +srs_error_t SrsMp4SampleAuxiliaryInfoSizeBox::encode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - - map tses; - - // Load samples from moov, merge to temp samples. - if ((err = do_load(tses, moov)) != srs_success) { - map::iterator it; - for (it = tses.begin(); it != tses.end(); ++it) { - SrsMp4Sample* sample = it->second; - srs_freep(sample); - } - return srs_error_wrap(err, "load mp4"); + if ((err = SrsMp4FullBox::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - - // Dumps temp samples. - // Adjust the sequence diff. - int32_t maxp = 0; - int32_t maxn = 0; - if (true) { - SrsMp4Sample* pvideo = NULL; - map::iterator it; - for (it = tses.begin(); it != tses.end(); ++it) { - SrsMp4Sample* sample = it->second; - samples.push_back(sample); - - if (sample->type == SrsFrameTypeVideo) { - pvideo = sample; - } else if (pvideo) { - int32_t diff = sample->dts_ms() - pvideo->dts_ms(); - if (diff > 0) { - maxp = srs_max(maxp, diff); - } else { - maxn = srs_min(maxn, diff); - } - pvideo = NULL; - } - } + + if (flags & 0x01) { + buf->write_4bytes(aux_info_type); + buf->write_4bytes(aux_info_type_parameter); } - - // Adjust when one of maxp and maxn is zero, - // that means we can adjust by add maxn or sub maxp, - // notice that maxn is negative and maxp is positive. - if (maxp * maxn == 0 && maxp + maxn != 0) { - map::iterator it; - for (it = tses.begin(); it != tses.end(); ++it) { - SrsMp4Sample* sample = it->second; - if (sample->type == SrsFrameTypeAudio) { - sample->adjust = 0 - maxp - maxn; - } + + buf->write_1bytes(default_sample_info_size); + + if (default_sample_info_size == 0) { + buf->write_4bytes(sample_info_sizes.size()); + vector::iterator it; + for (it = sample_info_sizes.begin(); it != sample_info_sizes.end(); ++it) + { + buf->write_1bytes(*it); } + } else { + buf->write_4bytes(sample_count); } return err; } -SrsMp4Sample* SrsMp4SampleManager::at(uint32_t index) +srs_error_t SrsMp4SampleAuxiliaryInfoSizeBox::decode_header(SrsBuffer* buf) { - if (index < samples.size()) { - return samples.at(index); + srs_error_t err = srs_success; + + if ((err = SrsMp4FullBox::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "decode header"); } - return NULL; + + if (flags & 0x01) { + aux_info_type = buf->read_4bytes(); + aux_info_type_parameter = buf->read_4bytes(); + } + + default_sample_info_size = buf->read_1bytes(); + sample_count = buf->read_4bytes(); + + if (default_sample_info_size == 0) { + for (int i = 0; i < sample_count; i++) { + sample_info_sizes.push_back(buf->read_1bytes()); + } + } + + return err; } -void SrsMp4SampleManager::append(SrsMp4Sample* sample) +std::stringstream& SrsMp4SampleAuxiliaryInfoSizeBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - samples.push_back(sample); + ss << "default_sample_info_size=" << default_sample_info_size << ", sample_count=" << sample_count; + return ss; } -srs_error_t SrsMp4SampleManager::write(SrsMp4MovieBox* moov) +SrsMp4SampleAuxiliaryInfoOffsetBox::SrsMp4SampleAuxiliaryInfoOffsetBox() +{ + type = SrsMp4BoxTypeSAIO; +} + +SrsMp4SampleAuxiliaryInfoOffsetBox::~SrsMp4SampleAuxiliaryInfoOffsetBox() +{ +} + +int SrsMp4SampleAuxiliaryInfoOffsetBox::nb_header() +{ + int size = SrsMp4FullBox::nb_header(); + + if (flags & 0x01) { + size += 8; // sizeof(aux_info_type) + sizeof(aux_info_type_parameter); + } + + size += 4; // sizeof(entry_count); + if (version == 0) { + size += offsets.size() * 4; + } else { + size += offsets.size() * 8; + } + + return size; +} + +srs_error_t SrsMp4SampleAuxiliaryInfoOffsetBox::encode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - - SrsMp4TrackBox* vide = moov->video(); - if (vide) { - bool has_cts = false; - vector::iterator it; - for (it = samples.begin(); it != samples.end(); ++it) { - SrsMp4Sample* sample = *it; - if (sample->dts != sample->pts && sample->type == SrsFrameTypeVideo) { - has_cts = true; - break; - } - } - - SrsMp4SampleTableBox* stbl = vide->stbl(); - - SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); - stbl->set_stts(stts); - - SrsMp4SyncSampleBox* stss = new SrsMp4SyncSampleBox(); - stbl->set_stss(stss); - - SrsMp4CompositionTime2SampleBox* ctts = NULL; - if (has_cts) { - ctts = new SrsMp4CompositionTime2SampleBox(); - stbl->set_ctts(ctts); - } - - SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); - stbl->set_stsc(stsc); - - SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); - stbl->set_stsz(stsz); - - SrsMp4FullBox* co = NULL; - // When sample offset less than UINT32_MAX, we use stco(support 32bit offset) box to save storage space. - if (samples.empty() || (*samples.rbegin())->offset < UINT32_MAX) { - // stco support 32bit offset. - co = new SrsMp4ChunkOffsetBox(); - stbl->set_stco(static_cast(co)); - } else { - // When sample offset bigger than UINT32_MAX, we use co64(support 64bit offset) box to avoid overflow. - co = new SrsMp4ChunkLargeOffsetBox(); - stbl->set_co64(static_cast(co)); - } - if ((err = write_track(SrsFrameTypeVideo, stts, stss, ctts, stsc, stsz, co)) != srs_success) { - return srs_error_wrap(err, "write vide track"); - } + if ((err = SrsMp4FullBox::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - - SrsMp4TrackBox* soun = moov->audio(); - if (soun) { - SrsMp4SampleTableBox* stbl = soun->stbl(); - - SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); - stbl->set_stts(stts); - - SrsMp4SyncSampleBox* stss = NULL; - SrsMp4CompositionTime2SampleBox* ctts = NULL; - - SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); - stbl->set_stsc(stsc); - - SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); - stbl->set_stsz(stsz); - - SrsMp4FullBox* co = NULL; - if (samples.empty() || (*samples.rbegin())->offset < UINT32_MAX) { - co = new SrsMp4ChunkOffsetBox(); - stbl->set_stco(static_cast(co)); + + if (flags & 0x01) { + buf->write_4bytes(aux_info_type); + buf->write_4bytes(aux_info_type_parameter); + } + + buf->write_4bytes(offsets.size()); + vector::iterator it; + for (it = offsets.begin(); it != offsets.end(); ++it) + { + if (version == 0) { + buf->write_4bytes(*it); } else { - co = new SrsMp4ChunkLargeOffsetBox(); - stbl->set_co64(static_cast(co)); - } - - if ((err = write_track(SrsFrameTypeAudio, stts, stss, ctts, stsc, stsz, co)) != srs_success) { - return srs_error_wrap(err, "write soun track"); + buf->write_8bytes(*it); } } - + return err; } -srs_error_t SrsMp4SampleManager::write(SrsMp4MovieFragmentBox* moof, uint64_t dts) +srs_error_t SrsMp4SampleAuxiliaryInfoOffsetBox::decode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - SrsMp4TrackFragmentBox* traf = moof->traf(); - SrsMp4TrackFragmentRunBox* trun = traf->trun(); - - trun->flags = SrsMp4TrunFlagsDataOffset | SrsMp4TrunFlagsSampleDuration - | SrsMp4TrunFlagsSampleSize | SrsMp4TrunFlagsSampleFlag | SrsMp4TrunFlagsSampleCtsOffset; + if ((err = SrsMp4FullBox::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "decode header"); + } - SrsMp4Sample* previous = NULL; - - vector::iterator it; - for (it = samples.begin(); it != samples.end(); ++it) { - SrsMp4Sample* sample = *it; - SrsMp4TrunEntry* entry = new SrsMp4TrunEntry(trun); - - if (!previous) { - previous = sample; - entry->sample_flags = 0x02000000; - } else { - entry->sample_flags = 0x01000000; - } - - vector::iterator iter = (it + 1); - if (iter == samples.end()) { - entry->sample_duration = dts - sample->dts; + if (flags & 0x01) { + aux_info_type = buf->read_4bytes(); + aux_info_type_parameter = buf->read_4bytes(); + } + + uint32_t entry_count = buf->read_4bytes(); + for (int i = 0; i < entry_count; i++) + { + if (version == 0) { + offsets.push_back(buf->read_4bytes()); } else { - entry->sample_duration = (*iter)->dts - sample->dts; - } - - entry->sample_size = sample->nb_data; - entry->sample_composition_time_offset = (int64_t)(sample->pts - sample->dts); - if (entry->sample_composition_time_offset < 0) { - trun->version = 1; + offsets.push_back(buf->read_8bytes()); } - - trun->entries.push_back(entry); + } return err; } -srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track, - SrsMp4DecodingTime2SampleBox* stts, SrsMp4SyncSampleBox* stss, SrsMp4CompositionTime2SampleBox* ctts, - SrsMp4Sample2ChunkBox* stsc, SrsMp4SampleSizeBox* stsz, SrsMp4FullBox* co) +std::stringstream& SrsMp4SampleAuxiliaryInfoOffsetBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - srs_error_t err = srs_success; - - SrsMp4SttsEntry stts_entry; - vector stts_entries; - - SrsMp4CttsEntry ctts_entry; - vector ctts_entries; - - vector stsz_entries; - vector co_entries; - vector stss_entries; - - SrsMp4Sample* previous = NULL; - vector::iterator it; - for (it = samples.begin(); it != samples.end(); ++it) { - SrsMp4Sample* sample = *it; - if (sample->type != track) { - continue; - } - - stsz_entries.push_back(sample->nb_data); - co_entries.push_back((uint64_t)sample->offset); - - if (sample->frame_type == SrsVideoAvcFrameTypeKeyFrame) { - stss_entries.push_back(sample->index + 1); - } - - if (stts) { - if (previous) { - uint32_t delta = (uint32_t)(sample->dts - previous->dts); - if (stts_entry.sample_delta == 0 || stts_entry.sample_delta == delta) { - stts_entry.sample_delta = delta; - stts_entry.sample_count++; - } else { - stts_entries.push_back(stts_entry); - stts_entry.sample_count = 1; - stts_entry.sample_delta = delta; - } - } else { - // The first sample always in the STTS table. - stts_entry.sample_count++; - } - } - - if (ctts) { - int64_t offset = sample->pts - sample->dts; - if (offset < 0) { - ctts->version = 0x01; - } - if (ctts_entry.sample_count == 0 || ctts_entry.sample_offset == offset) { - ctts_entry.sample_count++; - } else { - ctts_entries.push_back(ctts_entry); - ctts_entry.sample_offset = offset; - ctts_entry.sample_count = 1; - } - } - - previous = sample; - } + ss << "entry_count=" << offsets.size(); + return ss; +} + +SrsMp4SubSampleEncryptionInfo::SrsMp4SubSampleEncryptionInfo() +{ + bytes_of_clear_data = 0; + bytes_of_protected_data = 0; +} + +SrsMp4SubSampleEncryptionInfo::~SrsMp4SubSampleEncryptionInfo() +{ +} + +uint64_t SrsMp4SubSampleEncryptionInfo::nb_bytes() +{ + // sizeof(bytes_of_clear_data) + sizeof(bytes_of_protected_data); + return 6; +} + +srs_error_t SrsMp4SubSampleEncryptionInfo::encode(SrsBuffer* buf) +{ + buf->write_2bytes(bytes_of_clear_data); + buf->write_4bytes(bytes_of_protected_data); + + return srs_success; +} + +srs_error_t SrsMp4SubSampleEncryptionInfo::decode(SrsBuffer* buf) +{ + bytes_of_clear_data = buf->read_2bytes(); + bytes_of_protected_data = buf->read_4bytes(); + + return srs_success; +} + +std::stringstream& SrsMp4SubSampleEncryptionInfo::dumps(std::stringstream& ss, SrsMp4DumpContext dc) +{ + ss << "bytes_of_clear_data=" << bytes_of_clear_data << ", bytes_of_protected_data=" << bytes_of_protected_data; + return ss; +} + +SrsMp4SampleEncryptionEntry::SrsMp4SampleEncryptionEntry(SrsMp4FullBox* senc, uint8_t per_sample_iv_size) +{ + senc_ = senc; + srs_assert(per_sample_iv_size == 0 || per_sample_iv_size == 8 || per_sample_iv_size == 16); + per_sample_iv_size_ = per_sample_iv_size; + iv_ = (uint8_t*) malloc(per_sample_iv_size); +} + +SrsMp4SampleEncryptionEntry::~SrsMp4SampleEncryptionEntry() +{ + srs_freep(iv_); +} + +srs_error_t SrsMp4SampleEncryptionEntry::set_iv(uint8_t* iv, uint8_t iv_size) +{ + srs_assert(iv_size == per_sample_iv_size_); + memcpy(iv_, iv, iv_size); - if (stts && stts_entry.sample_count) { - stts_entries.push_back(stts_entry); + return srs_success; +} + +uint64_t SrsMp4SampleEncryptionEntry::nb_bytes() +{ + uint64_t size = per_sample_iv_size_; + if (senc_->flags & SrsMp4CencSampleEncryptionUseSubSample) { + size += 2; // size of subsample_count + size += subsample_infos.size() * 6; } - if (ctts && ctts_entry.sample_count) { - ctts_entries.push_back(ctts_entry); + return size; +} + +srs_error_t SrsMp4SampleEncryptionEntry::encode(SrsBuffer* buf) +{ + if (per_sample_iv_size_ != 0) { + buf->write_bytes((char*) iv_, per_sample_iv_size_); } - - if (stts && !stts_entries.empty()) { - stts->entries = stts_entries; + + if (senc_->flags & SrsMp4CencSampleEncryptionUseSubSample) { + buf->write_2bytes(subsample_infos.size()); + + vector::iterator it; + for (it = subsample_infos.begin(); it != subsample_infos.end(); ++it) { + (*it).encode(buf); + } } - - if (ctts && !ctts_entries.empty()) { - ctts->entries = ctts_entries; + + return srs_success; +} + +srs_error_t SrsMp4SampleEncryptionEntry::decode(SrsBuffer* buf) +{ + if (per_sample_iv_size_ > 0) { + buf->read_bytes((char*)iv_, per_sample_iv_size_); } - - if (stsc) { - stsc->entry_count = 1; - stsc->entries = new SrsMp4StscEntry[1]; - - SrsMp4StscEntry& v = stsc->entries[0]; - v.first_chunk = v.sample_description_index = v.samples_per_chunk = 1; + + if (senc_->flags & SrsMp4CencSampleEncryptionUseSubSample) { + uint16_t subsample_count = buf->read_2bytes(); + for (uint16_t i = 0; i < subsample_count; i++) { + SrsMp4SubSampleEncryptionInfo info; + info.decode(buf); + subsample_infos.push_back(info); + } } + return srs_success; +} + +std::stringstream& SrsMp4SampleEncryptionEntry::dumps(std::stringstream& ss, SrsMp4DumpContext dc) +{ + // TODO: dump what? + ss << "iv=" << iv_ << endl; - if (stsz && !stsz_entries.empty()) { - stsz->sample_size = 0; - stsz->sample_count = (uint32_t)stsz_entries.size(); - stsz->entry_sizes = new uint32_t[stsz->sample_count]; - for (int i = 0; i < (int)stsz->sample_count; i++) { - stsz->entry_sizes[i] = stsz_entries.at(i); - } + vector::iterator it; + for (it = subsample_infos.begin(); it != subsample_infos.end(); ++it) { + (*it).dumps(ss, dc); + ss << endl; } - if (!co_entries.empty()) { - SrsMp4ChunkOffsetBox* stco = dynamic_cast(co); - SrsMp4ChunkLargeOffsetBox* co64 = dynamic_cast(co); + return ss; +} - if (stco) { - stco->entry_count = (uint32_t)co_entries.size(); - stco->entries = new uint32_t[stco->entry_count]; - for (int i = 0; i < (int)stco->entry_count; i++) { - stco->entries[i] = co_entries.at(i); - } - } else if (co64) { - co64->entry_count = (uint32_t)co_entries.size(); - co64->entries = new uint64_t[co64->entry_count]; - for (int i = 0; i < (int)co64->entry_count; i++) { - co64->entries[i] = co_entries.at(i); - } - } +SrsMp4SampleEncryptionBox::SrsMp4SampleEncryptionBox(uint8_t per_sample_iv_size) +{ + version = 0; + flags = SrsMp4CencSampleEncryptionUseSubSample; + type = SrsMp4BoxTypeSENC; + srs_assert(per_sample_iv_size == 0 || per_sample_iv_size == 8 || per_sample_iv_size == 16); + per_sample_iv_size_ = per_sample_iv_size; +} + +SrsMp4SampleEncryptionBox::~SrsMp4SampleEncryptionBox() +{ + vector::iterator it; + for (it = entries.begin(); it != entries.end(); it++) + { + SrsMp4SampleEncryptionEntry* entry = *it; + srs_freep(entry); } - - if (stss && !stss_entries.empty()) { - stss->entry_count = (uint32_t)stss_entries.size(); - stss->sample_numbers = new uint32_t[stss->entry_count]; - for (int i = 0; i < (int)stss->entry_count; i++) { - stss->sample_numbers[i] = stss_entries.at(i); - } + entries.clear(); +} + +int SrsMp4SampleEncryptionBox::nb_header() +{ + int size = SrsMp4FullBox::nb_header() + 4; + + vector::iterator it; + for (it = entries.begin(); it < entries.end(); it++) + { + size += (*it)->nb_bytes(); } - return err; + return size; } -srs_error_t SrsMp4SampleManager::do_load(map& tses, SrsMp4MovieBox* moov) +srs_error_t SrsMp4SampleEncryptionBox::encode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - - SrsMp4TrackBox* vide = moov->video(); - if (vide) { - SrsMp4MediaHeaderBox* mdhd = vide->mdhd(); - SrsMp4TrackType tt = vide->track_type(); - SrsMp4ChunkOffsetBox* stco = vide->stco(); - SrsMp4SampleSizeBox* stsz = vide->stsz(); - SrsMp4Sample2ChunkBox* stsc = vide->stsc(); - SrsMp4DecodingTime2SampleBox* stts = vide->stts(); - // The composition time to sample table is optional and must only be present if DT and CT differ for any samples. - SrsMp4CompositionTime2SampleBox* ctts = vide->ctts(); - // If the sync sample box is not present, every sample is a sync sample. - SrsMp4SyncSampleBox* stss = vide->stss(); - - if (!mdhd || !stco || !stsz || !stsc || !stts) { - return srs_error_new(ERROR_MP4_ILLEGAL_TRACK, "illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d", tt); - } - if ((err = load_trak(tses, SrsFrameTypeVideo, mdhd, stco, stsz, stsc, stts, ctts, stss)) != srs_success) { - return srs_error_wrap(err, "load vide track"); - } + if ((err = SrsMp4FullBox::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - - SrsMp4TrackBox* soun = moov->audio(); - if (soun) { - SrsMp4MediaHeaderBox* mdhd = soun->mdhd(); - SrsMp4TrackType tt = soun->track_type(); - SrsMp4ChunkOffsetBox* stco = soun->stco(); - SrsMp4SampleSizeBox* stsz = soun->stsz(); - SrsMp4Sample2ChunkBox* stsc = soun->stsc(); - SrsMp4DecodingTime2SampleBox* stts = soun->stts(); - - if (!mdhd || !stco || !stsz || !stsc || !stts) { - return srs_error_new(ERROR_MP4_ILLEGAL_TRACK, "illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d", tt); - } - - if ((err = load_trak(tses, SrsFrameTypeAudio, mdhd, stco, stsz, stsc, stts, NULL, NULL)) != srs_success) { - return srs_error_wrap(err, "load soun track"); - } + + buf->write_4bytes(entries.size()); + vector::iterator it; + for (it = entries.begin(); it != entries.end(); it++) + { + (*it)->encode(buf); } - + return err; } -srs_error_t SrsMp4SampleManager::load_trak(map& tses, SrsFrameType tt, - SrsMp4MediaHeaderBox* mdhd, SrsMp4ChunkOffsetBox* stco, SrsMp4SampleSizeBox* stsz, SrsMp4Sample2ChunkBox* stsc, - SrsMp4DecodingTime2SampleBox* stts, SrsMp4CompositionTime2SampleBox* ctts, SrsMp4SyncSampleBox* stss) +srs_error_t SrsMp4SampleEncryptionBox::decode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - // Samples per chunk. - stsc->initialize_counter(); - - // DTS box. - if ((err = stts->initialize_counter()) != srs_success) { - return srs_error_wrap(err, "stts init counter"); + if ((err = SrsMp4FullBox::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "decode header"); } - - // CTS/PTS box. - if (ctts && (err = ctts->initialize_counter()) != srs_success) { - return srs_error_wrap(err, "ctts init counter"); + + vector::iterator it; + for (it = entries.begin(); it != entries.end(); it++) + { + SrsMp4SampleEncryptionEntry* entry = *it; + srs_freep(entry); } + entries.clear(); - SrsMp4Sample* previous = NULL; - - // For each chunk offset. - for (uint32_t ci = 0; ci < stco->entry_count; ci++) { - // The sample offset relative in chunk. - uint32_t sample_relative_offset = 0; - - // Find how many samples from stsc. - SrsMp4StscEntry* stsc_entry = stsc->on_chunk(ci); - for (uint32_t i = 0; i < stsc_entry->samples_per_chunk; i++) { - SrsMp4Sample* sample = new SrsMp4Sample(); - sample->type = tt; - sample->index = (previous? previous->index+1:0); - sample->tbn = mdhd->timescale; - sample->offset = stco->entries[ci] + sample_relative_offset; - - uint32_t sample_size = 0; - if ((err = stsz->get_sample_size(sample->index, &sample_size)) != srs_success) { - srs_freep(sample); - return srs_error_wrap(err, "stsz get sample size"); - } - sample_relative_offset += sample_size; - - SrsMp4SttsEntry* stts_entry = NULL; - if ((err = stts->on_sample(sample->index, &stts_entry)) != srs_success) { - srs_freep(sample); - return srs_error_wrap(err, "stts on sample"); - } - if (previous) { - sample->pts = sample->dts = previous->dts + stts_entry->sample_delta; - } - - SrsMp4CttsEntry* ctts_entry = NULL; - if (ctts && (err = ctts->on_sample(sample->index, &ctts_entry)) != srs_success) { - srs_freep(sample); - return srs_error_wrap(err, "ctts on sample"); - } - if (ctts_entry) { - sample->pts = sample->dts + ctts_entry->sample_offset; - } - - if (tt == SrsFrameTypeVideo) { - if (!stss || stss->is_sync(sample->index)) { - sample->frame_type = SrsVideoAvcFrameTypeKeyFrame; - } else { - sample->frame_type = SrsVideoAvcFrameTypeInterFrame; - } - } - - // Only set the sample size, read data from io when needed. - sample->nb_data = sample_size; - sample->data = NULL; - - previous = sample; - tses[sample->offset] = sample; - } - } - - // Check total samples. - if (previous && previous->index + 1 != stsz->sample_count) { - return srs_error_new(ERROR_MP4_ILLEGAL_SAMPLES, "illegal samples count, expect=%d, actual=%d", stsz->sample_count, previous->index + 1); + int32_t size = buf->read_4bytes(); + for (int i = 0; i < size; i++) { + SrsMp4SampleEncryptionEntry *entry = new SrsMp4SampleEncryptionEntry(this, per_sample_iv_size_); + entry->decode(buf); + entries.push_back(entry); } - + return err; } -SrsMp4BoxReader::SrsMp4BoxReader() +std::stringstream& SrsMp4SampleEncryptionBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - rsio = NULL; - buf = new char[SRS_MP4_BUF_SIZE]; + ss << "sample_count=" << entries.size() << endl; + return ss; } -SrsMp4BoxReader::~SrsMp4BoxReader() +SrsMp4ProtectionSchemeInfoBox::SrsMp4ProtectionSchemeInfoBox() { - srs_freepa(buf); + type = SrsMp4BoxTypeSINF; } -srs_error_t SrsMp4BoxReader::initialize(ISrsReadSeeker* rs) +SrsMp4ProtectionSchemeInfoBox::~SrsMp4ProtectionSchemeInfoBox() { - rsio = rs; - - return srs_success; } -srs_error_t SrsMp4BoxReader::read(SrsSimpleStream* stream, SrsMp4Box** ppbox) +SrsMp4OriginalFormatBox* SrsMp4ProtectionSchemeInfoBox::frma() { - srs_error_t err = srs_success; - - SrsMp4Box* box = NULL; - // Note that we should use SrsAutoFree to free the ptr which is set later. - SrsAutoFree(SrsMp4Box, box); + SrsMp4Box* box = get(SrsMp4BoxTypeFRMA); + return dynamic_cast(box); +} - while (true) { - // For the first time to read the box, maybe it's a basic box which is only 4bytes header. - // When we disconvery the real box, we know the size of the whole box, then read again and decode it. - uint64_t required = box? box->sz():4; - - // For mdat box, we only requires to decode the header. - if (box && box->is_mdat()) { - required = box->sz_header(); - } - - // Fill the stream util we can discovery box. - while (stream->length() < (int)required) { - ssize_t nread; - if ((err = rsio->read(buf, SRS_MP4_BUF_SIZE, &nread)) != srs_success) { - return srs_error_wrap(err, "load failed, nread=%d, required=%d", (int)nread, (int)required); - } - - srs_assert(nread > 0); - stream->append(buf, (int)nread); - } +void SrsMp4ProtectionSchemeInfoBox::set_frma(SrsMp4OriginalFormatBox* v) +{ + remove(SrsMp4BoxTypeFRMA); + boxes.push_back(v); +} - SrsUniquePtr buffer(new SrsBuffer(stream->bytes(), stream->length())); +SrsMp4SchemeTypeBox* SrsMp4ProtectionSchemeInfoBox::schm() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSCHM); + return dynamic_cast(box); +} - // Discovery the box with basic header. - if (!box && (err = SrsMp4Box::discovery(buffer.get(), &box)) != srs_success) { - if (srs_error_code(err) == ERROR_MP4_BOX_REQUIRE_SPACE) { - srs_freep(err); - continue; - } - return srs_error_wrap(err, "load box failed"); - } - - // When box is discoveried, check whether we can demux the whole box. - // For mdat, only the header is required. - required = (box->is_mdat()? box->sz_header():box->sz()); - if (!buffer->require((int)required)) { - continue; - } - - if (err == srs_success) { - *ppbox = box; - box = NULL; - } - - break; - } +void SrsMp4ProtectionSchemeInfoBox::set_schm(SrsMp4SchemeTypeBox* v) +{ + remove(SrsMp4BoxTypeSCHM); + boxes.push_back(v); +} + +SrsMp4SchemeInfoBox* SrsMp4ProtectionSchemeInfoBox::schi() +{ + SrsMp4Box* box = get(SrsMp4BoxTypeSCHI); + return dynamic_cast(box); +} + +void SrsMp4ProtectionSchemeInfoBox::set_schi(SrsMp4SchemeInfoBox* v) +{ + remove(SrsMp4BoxTypeSCHI); + boxes.push_back(v); +} + + +SrsMp4OriginalFormatBox::SrsMp4OriginalFormatBox(uint32_t original_format) +{ + type = SrsMp4BoxTypeFRMA; + data_format_ = original_format; +} + +SrsMp4OriginalFormatBox::~SrsMp4OriginalFormatBox() +{ +} + +int SrsMp4OriginalFormatBox::nb_header() +{ + return SrsMp4Box::nb_header() + 4; +} + +srs_error_t SrsMp4OriginalFormatBox::encode_header(SrsBuffer* buf) +{ + srs_error_t err = srs_success; + if ((err = SrsMp4Box::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); + } + + buf->write_4bytes(data_format_); + return err; } -srs_error_t SrsMp4BoxReader::skip(SrsMp4Box* box, SrsSimpleStream* stream) +srs_error_t SrsMp4OriginalFormatBox::decode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - - // For mdat, always skip the content. - if (box->is_mdat()) { - int offset = (int)(box->sz() - stream->length()); - if (offset < 0) { - stream->erase(stream->length() + offset); - } else { - stream->erase(stream->length()); - } - if (offset > 0 && (err = rsio->lseek(offset, SEEK_CUR, NULL)) != srs_success) { - return srs_error_wrap(err, "io seek"); - } - } else { - // Remove the consumed bytes. - stream->erase((int)box->sz()); + + if ((err = SrsMp4Box::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "decode header"); } + + data_format_ = buf->read_4bytes(); return err; } -SrsMp4Decoder::SrsMp4Decoder() +std::stringstream& SrsMp4OriginalFormatBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - rsio = NULL; - brand = SrsMp4BoxBrandForbidden; - stream = new SrsSimpleStream(); - vcodec = SrsVideoCodecIdForbidden; - acodec = SrsAudioCodecIdForbidden; - asc_written = avcc_written = false; - sample_rate = SrsAudioSampleRateForbidden; - sound_bits = SrsAudioSampleBitsForbidden; - channels = SrsAudioChannelsForbidden; - samples = new SrsMp4SampleManager(); - br = new SrsMp4BoxReader(); - current_index = 0; - current_offset = 0; + ss << "original format=" << data_format_ << endl; + return ss; } -SrsMp4Decoder::~SrsMp4Decoder() +SrsMp4SchemeTypeBox::SrsMp4SchemeTypeBox() { - srs_freep(br); - srs_freep(stream); - srs_freep(samples); + type = SrsMp4BoxTypeSCHM; } -srs_error_t SrsMp4Decoder::initialize(ISrsReadSeeker* rs) +SrsMp4SchemeTypeBox::~SrsMp4SchemeTypeBox() { - srs_error_t err = srs_success; - - srs_assert(rs); - rsio = rs; - - if ((err = br->initialize(rs)) != srs_success) { - return srs_error_wrap(err, "init box reader"); +} + +void SrsMp4SchemeTypeBox::set_scheme_uri(char* uri, uint32_t uri_size) +{ + srs_assert(uri_size < SCHM_SCHEME_URI_MAX_SIZE); + memcpy(scheme_uri, uri, uri_size); + scheme_uri_size = uri_size; + scheme_uri[uri_size] = '\0'; +} + +int SrsMp4SchemeTypeBox::nb_header() +{ + int size = SrsMp4FullBox::nb_header() + 4 + 4; // sizeof(scheme_type) + sizeof(scheme_version) + + if (flags & 0x01) { + size += scheme_uri_size; } - // For mdat before moov, we must reset the offset to the mdat. - off_t offset = -1; - - while (true) { - SrsMp4Box* box_raw = NULL; - if ((err = load_next_box(&box_raw, 0)) != srs_success) { - return srs_error_wrap(err, "load box"); - } - SrsUniquePtr box(box_raw); + return size; +} + +srs_error_t SrsMp4SchemeTypeBox::encode_header(SrsBuffer* buf) +{ + srs_error_t err = srs_success; - if (box->is_ftyp()) { - SrsMp4FileTypeBox* ftyp = dynamic_cast(box.get()); - if ((err = parse_ftyp(ftyp)) != srs_success) { - return srs_error_wrap(err, "parse ftyp"); - } - } else if (box->is_mdat()) { - off_t cur = 0; - if ((err = rsio->lseek(0, SEEK_CUR, &cur)) != srs_success) { - return srs_error_wrap(err, "io seek"); - } - offset = off_t(cur - box->sz()); - } else if (box->is_moov()) { - SrsMp4MovieBox* moov = dynamic_cast(box.get()); - if ((err = parse_moov(moov)) != srs_success) { - return srs_error_wrap(err, "parse moov"); - } - break; - } + if ((err = SrsMp4FullBox::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - - if (brand == SrsMp4BoxBrandForbidden) { - return srs_error_new(ERROR_MP4_BOX_ILLEGAL_SCHEMA, "missing ftyp"); + + buf->write_4bytes(scheme_type); + buf->write_4bytes(scheme_version); + + if (flags & 0x01) { + buf->write_bytes(scheme_uri, scheme_uri_size); + buf->write_1bytes(0); } - - // Set the offset to the mdat. - if (offset >= 0) { - if ((err = rsio->lseek(offset, SEEK_SET, ¤t_offset)) != srs_success) { - return srs_error_wrap(err, "seek to mdat"); + + return err; +} + +srs_error_t SrsMp4SchemeTypeBox::decode_header(SrsBuffer* buf) +{ + srs_error_t err = srs_success; + + if ((err = SrsMp4FullBox::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); + } + scheme_type = buf->read_4bytes(); + scheme_version = buf->read_4bytes(); + + if (flags & 0x01) { + memset(scheme_uri, 0, SCHM_SCHEME_URI_MAX_SIZE); + int s = 0; + while (s < SCHM_SCHEME_URI_MAX_SIZE-1) { + char c = buf->read_1bytes(); + scheme_uri[s] = c; + s++; + if (c == '\0') { + break; + } } + scheme_uri_size = s; } return err; } -srs_error_t SrsMp4Decoder::read_sample(SrsMp4HandlerType* pht, uint16_t* pft, uint16_t* pct, uint32_t* pdts, uint32_t* ppts, uint8_t** psample, uint32_t* pnb_sample) +std::stringstream& SrsMp4SchemeTypeBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - srs_error_t err = srs_success; - - if (!avcc_written && !pavcc.empty()) { - avcc_written = true; - *pdts = *ppts = 0; - *pht = SrsMp4HandlerTypeVIDE; - - uint32_t nb_sample = *pnb_sample = (uint32_t)pavcc.size(); - uint8_t* sample = *psample = new uint8_t[nb_sample]; - memcpy(sample, &pavcc[0], nb_sample); - - *pft = SrsVideoAvcFrameTypeKeyFrame; - *pct = SrsVideoAvcFrameTraitSequenceHeader; - - return err; + ss << "scheme_type=" << scheme_type << ", scheme_version=" << scheme_version << endl; + if (flags & 0x01) { + ss << "scheme_uri=" << scheme_uri << endl; } - - if (!asc_written && !pasc.empty()) { - asc_written = true; - *pdts = *ppts = 0; - *pht = SrsMp4HandlerTypeSOUN; - - uint32_t nb_sample = *pnb_sample = (uint32_t)pasc.size(); - uint8_t* sample = *psample = new uint8_t[nb_sample]; - memcpy(sample, &pasc[0], nb_sample); - - *pft = 0x00; - *pct = SrsAudioAacFrameTraitSequenceHeader; - - return err; + + return ss; +} + +SrsMp4SchemeInfoBox::SrsMp4SchemeInfoBox() +{ + type = SrsMp4BoxTypeSCHI; +} + +SrsMp4SchemeInfoBox::~SrsMp4SchemeInfoBox() +{ +} + +SrsMp4TrackEncryptionBox::SrsMp4TrackEncryptionBox() +{ + type = SrsMp4BoxTypeTENC; +} + +SrsMp4TrackEncryptionBox::~SrsMp4TrackEncryptionBox() +{ +} + +void SrsMp4TrackEncryptionBox::set_default_constant_IV(uint8_t* iv, uint8_t iv_size) +{ + srs_assert(iv_size == 8 || iv_size == 16); + memcpy(default_constant_IV, iv, iv_size); + default_constant_IV_size = iv_size; +} + +int SrsMp4TrackEncryptionBox::nb_header() +{ + int size = SrsMp4FullBox::nb_header(); + size += 1; // sizeof(reserved) + size += 1; // sizeof(reserved_2) or sizeof(default_crypt_byte_block) + sizeof(default_skip_byte_block); + size += 1; // sizeof(default_isProtected); + size += 1; // sizeof(default_Per_Sample_IV_Size; + size += 16; // sizeof(default_KID); + if (default_is_protected == 1 && default_per_sample_IV_size == 0) { + size += 1 + default_constant_IV_size; // sizeof(default_constant_IV_size) + sizeof(default_constant_IV); } - - SrsMp4Sample* ps = samples->at(current_index++); - if (!ps) { - return srs_error_new(ERROR_SYSTEM_FILE_EOF, "EOF"); + + return size; +} + +srs_error_t SrsMp4TrackEncryptionBox::encode_header(SrsBuffer* buf) +{ + srs_error_t err = srs_success; + + if ((err = SrsMp4FullBox::encode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - - if (ps->type == SrsFrameTypeVideo) { - *pht = SrsMp4HandlerTypeVIDE; - *pct = SrsVideoAvcFrameTraitNALU; + + buf->write_1bytes(reserved); + if (version == 0) { + buf->write_1bytes(reserved_2); } else { - *pht = SrsMp4HandlerTypeSOUN; - *pct = SrsAudioAacFrameTraitRawData; - } - *pdts = ps->dts_ms(); - *ppts = ps->pts_ms(); - *pft = ps->frame_type; - - // Read sample from io, for we never preload the samples(too large). - if (ps->offset != current_offset) { - if ((err = rsio->lseek(ps->offset, SEEK_SET, ¤t_offset)) != srs_success) { - return srs_error_wrap(err, "seek to sample"); - } + buf->write_1bytes( (default_crypt_byte_block << 4) | (default_skip_byte_block & 0x0F)); } - - uint32_t nb_sample = ps->nb_data; - uint8_t* sample = new uint8_t[nb_sample]; - // TODO: FIXME: Use fully read. - if ((err = rsio->read(sample, nb_sample, NULL)) != srs_success) { - srs_freepa(sample); - return srs_error_wrap(err, "read sample"); + + buf->write_1bytes(default_is_protected); + buf->write_1bytes(default_per_sample_IV_size); + buf->write_bytes((char*)default_KID, 16); + if (default_is_protected == 1 && default_per_sample_IV_size == 0) { + buf->write_1bytes(default_constant_IV_size); + buf->write_bytes((char*)default_constant_IV, default_constant_IV_size); } - - *psample = sample; - *pnb_sample = nb_sample; - current_offset += nb_sample; - + return err; } -srs_error_t SrsMp4Decoder::parse_ftyp(SrsMp4FileTypeBox* ftyp) +srs_error_t SrsMp4TrackEncryptionBox::decode_header(SrsBuffer* buf) { srs_error_t err = srs_success; - - // File Type Box (ftyp) - bool legal_brand = false; - static SrsMp4BoxBrand legal_brands[] = { - SrsMp4BoxBrandISOM, SrsMp4BoxBrandISO2, SrsMp4BoxBrandAVC1, SrsMp4BoxBrandMP41, - SrsMp4BoxBrandISO5 - }; - for (int i = 0; i < (int)(sizeof(legal_brands)/sizeof(SrsMp4BoxBrand)); i++) { - if (ftyp->major_brand == legal_brands[i]) { - legal_brand = true; - break; - } + + if ((err = SrsMp4FullBox::decode_header(buf)) != srs_success) { + return srs_error_wrap(err, "encode header"); } - if (!legal_brand) { - return srs_error_new(ERROR_MP4_BOX_ILLEGAL_BRAND, "brand is illegal, brand=%d", ftyp->major_brand); + reserved = buf->read_1bytes(); + if (version == 0) { + reserved_2 = buf->read_1bytes(); + } else { + uint8_t v = buf->read_1bytes(); + default_crypt_byte_block = v >> 4; + default_skip_byte_block = v & 0x0f; } - - brand = ftyp->major_brand; - + + default_is_protected = buf->read_1bytes(); + default_per_sample_IV_size = buf->read_1bytes(); + buf->read_bytes((char*)default_KID, 16); + + if (default_is_protected == 1 && default_per_sample_IV_size == 0) { + default_constant_IV_size = buf->read_1bytes(); + srs_assert(default_constant_IV_size == 8 || default_constant_IV_size == 16); + buf->read_bytes((char*) default_constant_IV, default_constant_IV_size); + } + return err; } -srs_error_t SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) +std::stringstream& SrsMp4TrackEncryptionBox::dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc) { - srs_error_t err = srs_success; - - SrsMp4MovieHeaderBox* mvhd = moov->mvhd(); - if (!mvhd) { - return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing mvhd"); + if (version != 0) { + ss << "default_crypt_byte_block=" << default_crypt_byte_block << ", default_skip_byte_block=" << default_skip_byte_block << endl; } + ss << "default_isProtected=" << default_is_protected << ", default_per_sample_IV_size=" << default_per_sample_IV_size << endl; - SrsMp4TrackBox* vide = moov->video(); - SrsMp4TrackBox* soun = moov->audio(); - if (!vide && !soun) { - return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing audio and video track"); + return ss; +} + +SrsMp4Sample::SrsMp4Sample() +{ + type = SrsFrameTypeForbidden; + offset = 0; + index = 0; + dts = pts = 0; + nb_data = 0; + data = NULL; + frame_type = SrsVideoAvcFrameTypeForbidden; + tbn = 0; + adjust = 0; +} + +SrsMp4Sample::~SrsMp4Sample() +{ + srs_freepa(data); +} + +uint32_t SrsMp4Sample::dts_ms() +{ + return (uint32_t)(dts * 1000 / tbn) + adjust; +} + +uint32_t SrsMp4Sample::pts_ms() +{ + return (uint32_t)(pts * 1000 / tbn) + adjust; +} + +SrsMp4SampleManager::SrsMp4SampleManager() +{ +} + +SrsMp4SampleManager::~SrsMp4SampleManager() +{ + vector::iterator it; + for (it = samples.begin(); it != samples.end(); ++it) { + SrsMp4Sample* sample = *it; + srs_freep(sample); } + samples.clear(); +} + +srs_error_t SrsMp4SampleManager::load(SrsMp4MovieBox* moov) +{ + srs_error_t err = srs_success; - SrsMp4AudioSampleEntry* mp4a = soun? soun->mp4a():NULL; - if (mp4a) { - uint32_t sr = mp4a->samplerate>>16; - if ((sample_rate = srs_audio_sample_rate_from_number(sr)) == SrsAudioSampleRateForbidden) { - sample_rate = srs_audio_sample_rate_guess_number(sr); - } - - if (mp4a->samplesize == 16) { - sound_bits = SrsAudioSampleBits16bit; - } else { - sound_bits = SrsAudioSampleBits8bit; + map tses; + + // Load samples from moov, merge to temp samples. + if ((err = do_load(tses, moov)) != srs_success) { + map::iterator it; + for (it = tses.begin(); it != tses.end(); ++it) { + SrsMp4Sample* sample = it->second; + srs_freep(sample); } - if (mp4a->channelcount == 2) { - channels = SrsAudioChannelsStereo; - } else { - channels = SrsAudioChannelsMono; - } + return srs_error_wrap(err, "load mp4"); } - SrsMp4AvccBox* avcc = vide? vide->avcc():NULL; - SrsMp4DecoderSpecificInfo* asc = soun? soun->asc():NULL; - if (vide && !avcc) { - return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing video sequence header"); - } - if (soun && !asc && soun->soun_codec() == SrsAudioCodecIdAAC) { - return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing audio sequence header"); + // Dumps temp samples. + // Adjust the sequence diff. + int32_t maxp = 0; + int32_t maxn = 0; + if (true) { + SrsMp4Sample* pvideo = NULL; + map::iterator it; + for (it = tses.begin(); it != tses.end(); ++it) { + SrsMp4Sample* sample = it->second; + samples.push_back(sample); + + if (sample->type == SrsFrameTypeVideo) { + pvideo = sample; + } else if (pvideo) { + int32_t diff = sample->dts_ms() - pvideo->dts_ms(); + if (diff > 0) { + maxp = srs_max(maxp, diff); + } else { + maxn = srs_min(maxn, diff); + } + pvideo = NULL; + } + } } - vcodec = vide?vide->vide_codec():SrsVideoCodecIdForbidden; - acodec = soun?soun->soun_codec():SrsAudioCodecIdForbidden; - - if (avcc && !avcc->avc_config.empty()) { - pavcc = avcc->avc_config; - } - if (asc && !asc->asc.empty()) { - pasc = asc->asc; + // Adjust when one of maxp and maxn is zero, + // that means we can adjust by add maxn or sub maxp, + // notice that maxn is negative and maxp is positive. + if (maxp * maxn == 0 && maxp + maxn != 0) { + map::iterator it; + for (it = tses.begin(); it != tses.end(); ++it) { + SrsMp4Sample* sample = it->second; + if (sample->type == SrsFrameTypeAudio) { + sample->adjust = 0 - maxp - maxn; + } + } } - // Build the samples structure from moov. - if ((err = samples->load(moov)) != srs_success) { - return srs_error_wrap(err, "load samples"); + return err; +} + +SrsMp4Sample* SrsMp4SampleManager::at(uint32_t index) +{ + if (index < samples.size()) { + return samples.at(index); } - - stringstream ss; + return NULL; +} + +void SrsMp4SampleManager::append(SrsMp4Sample* sample) +{ + samples.push_back(sample); +} + +srs_error_t SrsMp4SampleManager::write(SrsMp4MovieBox* moov) +{ + srs_error_t err = srs_success; + + SrsMp4TrackBox* vide = moov->video(); + if (vide) { + bool has_cts = false; + vector::iterator it; + for (it = samples.begin(); it != samples.end(); ++it) { + SrsMp4Sample* sample = *it; + if (sample->dts != sample->pts && sample->type == SrsFrameTypeVideo) { + has_cts = true; + break; + } + } + + SrsMp4SampleTableBox* stbl = vide->stbl(); + + SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); + stbl->set_stts(stts); + + SrsMp4SyncSampleBox* stss = new SrsMp4SyncSampleBox(); + stbl->set_stss(stss); + + SrsMp4CompositionTime2SampleBox* ctts = NULL; + if (has_cts) { + ctts = new SrsMp4CompositionTime2SampleBox(); + stbl->set_ctts(ctts); + } + + SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); + stbl->set_stsc(stsc); + + SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); + stbl->set_stsz(stsz); + + SrsMp4FullBox* co = NULL; + // When sample offset less than UINT32_MAX, we use stco(support 32bit offset) box to save storage space. + if (samples.empty() || (*samples.rbegin())->offset < UINT32_MAX) { + // stco support 32bit offset. + co = new SrsMp4ChunkOffsetBox(); + stbl->set_stco(static_cast(co)); + } else { + // When sample offset bigger than UINT32_MAX, we use co64(support 64bit offset) box to avoid overflow. + co = new SrsMp4ChunkLargeOffsetBox(); + stbl->set_co64(static_cast(co)); + } + + if ((err = write_track(SrsFrameTypeVideo, stts, stss, ctts, stsc, stsz, co)) != srs_success) { + return srs_error_wrap(err, "write vide track"); + } + } + + SrsMp4TrackBox* soun = moov->audio(); + if (soun) { + SrsMp4SampleTableBox* stbl = soun->stbl(); + + SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); + stbl->set_stts(stts); + + SrsMp4SyncSampleBox* stss = NULL; + SrsMp4CompositionTime2SampleBox* ctts = NULL; + + SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); + stbl->set_stsc(stsc); + + SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); + stbl->set_stsz(stsz); + + SrsMp4FullBox* co = NULL; + if (samples.empty() || (*samples.rbegin())->offset < UINT32_MAX) { + co = new SrsMp4ChunkOffsetBox(); + stbl->set_stco(static_cast(co)); + } else { + co = new SrsMp4ChunkLargeOffsetBox(); + stbl->set_co64(static_cast(co)); + } + + if ((err = write_track(SrsFrameTypeAudio, stts, stss, ctts, stsc, stsz, co)) != srs_success) { + return srs_error_wrap(err, "write soun track"); + } + } + + return err; +} + +srs_error_t SrsMp4SampleManager::write(SrsMp4TrackFragmentBox* traf, uint64_t dts) +{ + srs_error_t err = srs_success; + + SrsMp4TrackFragmentRunBox* trun = traf->trun(); + trun->flags = SrsMp4TrunFlagsDataOffset | SrsMp4TrunFlagsSampleDuration + | SrsMp4TrunFlagsSampleSize | SrsMp4TrunFlagsSampleFlag | SrsMp4TrunFlagsSampleCtsOffset; + + SrsMp4Sample* previous = NULL; + + vector::iterator it; + for (it = samples.begin(); it != samples.end(); ++it) { + SrsMp4Sample* sample = *it; + SrsMp4TrunEntry* entry = new SrsMp4TrunEntry(trun); + + if (!previous) { + previous = sample; + entry->sample_flags = 0x02000000; + } else { + entry->sample_flags = 0x01000000; + } + + vector::iterator iter = (it + 1); + if (iter == samples.end()) { + entry->sample_duration = dts - sample->dts; + } else { + entry->sample_duration = (*iter)->dts - sample->dts; + } + + entry->sample_size = sample->nb_data; + entry->sample_composition_time_offset = (int64_t)(sample->pts - sample->dts); + if (entry->sample_composition_time_offset < 0) { + trun->version = 1; + } + + trun->entries.push_back(entry); + } + + return err; +} + +srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track, + SrsMp4DecodingTime2SampleBox* stts, SrsMp4SyncSampleBox* stss, SrsMp4CompositionTime2SampleBox* ctts, + SrsMp4Sample2ChunkBox* stsc, SrsMp4SampleSizeBox* stsz, SrsMp4FullBox* co) +{ + srs_error_t err = srs_success; + + SrsMp4SttsEntry stts_entry; + vector stts_entries; + + SrsMp4CttsEntry ctts_entry; + vector ctts_entries; + + vector stsz_entries; + vector co_entries; + vector stss_entries; + + SrsMp4Sample* previous = NULL; + vector::iterator it; + for (it = samples.begin(); it != samples.end(); ++it) { + SrsMp4Sample* sample = *it; + if (sample->type != track) { + continue; + } + + stsz_entries.push_back(sample->nb_data); + co_entries.push_back((uint64_t)sample->offset); + + if (sample->frame_type == SrsVideoAvcFrameTypeKeyFrame) { + stss_entries.push_back(sample->index + 1); + } + + if (stts) { + if (previous) { + uint32_t delta = (uint32_t)(sample->dts - previous->dts); + if (stts_entry.sample_delta == 0 || stts_entry.sample_delta == delta) { + stts_entry.sample_delta = delta; + stts_entry.sample_count++; + } else { + stts_entries.push_back(stts_entry); + stts_entry.sample_count = 1; + stts_entry.sample_delta = delta; + } + } else { + // The first sample always in the STTS table. + stts_entry.sample_count++; + } + } + + if (ctts) { + int64_t offset = sample->pts - sample->dts; + if (offset < 0) { + ctts->version = 0x01; + } + if (ctts_entry.sample_count == 0 || ctts_entry.sample_offset == offset) { + ctts_entry.sample_count++; + } else { + ctts_entries.push_back(ctts_entry); + ctts_entry.sample_offset = offset; + ctts_entry.sample_count = 1; + } + } + + previous = sample; + } + + if (stts && stts_entry.sample_count) { + stts_entries.push_back(stts_entry); + } + + if (ctts && ctts_entry.sample_count) { + ctts_entries.push_back(ctts_entry); + } + + if (stts && !stts_entries.empty()) { + stts->entries = stts_entries; + } + + if (ctts && !ctts_entries.empty()) { + ctts->entries = ctts_entries; + } + + if (stsc) { + stsc->entry_count = 1; + stsc->entries = new SrsMp4StscEntry[1]; + + SrsMp4StscEntry& v = stsc->entries[0]; + v.first_chunk = v.sample_description_index = v.samples_per_chunk = 1; + } + + if (stsz && !stsz_entries.empty()) { + stsz->sample_size = 0; + stsz->sample_count = (uint32_t)stsz_entries.size(); + stsz->entry_sizes = new uint32_t[stsz->sample_count]; + for (int i = 0; i < (int)stsz->sample_count; i++) { + stsz->entry_sizes[i] = stsz_entries.at(i); + } + } + + if (!co_entries.empty()) { + SrsMp4ChunkOffsetBox* stco = dynamic_cast(co); + SrsMp4ChunkLargeOffsetBox* co64 = dynamic_cast(co); + + if (stco) { + stco->entry_count = (uint32_t)co_entries.size(); + stco->entries = new uint32_t[stco->entry_count]; + for (int i = 0; i < (int)stco->entry_count; i++) { + stco->entries[i] = co_entries.at(i); + } + } else if (co64) { + co64->entry_count = (uint32_t)co_entries.size(); + co64->entries = new uint64_t[co64->entry_count]; + for (int i = 0; i < (int)co64->entry_count; i++) { + co64->entries[i] = co_entries.at(i); + } + } + } + + if (stss && !stss_entries.empty()) { + stss->entry_count = (uint32_t)stss_entries.size(); + stss->sample_numbers = new uint32_t[stss->entry_count]; + for (int i = 0; i < (int)stss->entry_count; i++) { + stss->sample_numbers[i] = stss_entries.at(i); + } + } + + return err; +} + +srs_error_t SrsMp4SampleManager::do_load(map& tses, SrsMp4MovieBox* moov) +{ + srs_error_t err = srs_success; + + SrsMp4TrackBox* vide = moov->video(); + if (vide) { + SrsMp4MediaHeaderBox* mdhd = vide->mdhd(); + SrsMp4TrackType tt = vide->track_type(); + SrsMp4ChunkOffsetBox* stco = vide->stco(); + SrsMp4SampleSizeBox* stsz = vide->stsz(); + SrsMp4Sample2ChunkBox* stsc = vide->stsc(); + SrsMp4DecodingTime2SampleBox* stts = vide->stts(); + // The composition time to sample table is optional and must only be present if DT and CT differ for any samples. + SrsMp4CompositionTime2SampleBox* ctts = vide->ctts(); + // If the sync sample box is not present, every sample is a sync sample. + SrsMp4SyncSampleBox* stss = vide->stss(); + + if (!mdhd || !stco || !stsz || !stsc || !stts) { + return srs_error_new(ERROR_MP4_ILLEGAL_TRACK, "illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d", tt); + } + + if ((err = load_trak(tses, SrsFrameTypeVideo, mdhd, stco, stsz, stsc, stts, ctts, stss)) != srs_success) { + return srs_error_wrap(err, "load vide track"); + } + } + + SrsMp4TrackBox* soun = moov->audio(); + if (soun) { + SrsMp4MediaHeaderBox* mdhd = soun->mdhd(); + SrsMp4TrackType tt = soun->track_type(); + SrsMp4ChunkOffsetBox* stco = soun->stco(); + SrsMp4SampleSizeBox* stsz = soun->stsz(); + SrsMp4Sample2ChunkBox* stsc = soun->stsc(); + SrsMp4DecodingTime2SampleBox* stts = soun->stts(); + + if (!mdhd || !stco || !stsz || !stsc || !stts) { + return srs_error_new(ERROR_MP4_ILLEGAL_TRACK, "illegal track, empty mdhd/stco/stsz/stsc/stts, type=%d", tt); + } + + if ((err = load_trak(tses, SrsFrameTypeAudio, mdhd, stco, stsz, stsc, stts, NULL, NULL)) != srs_success) { + return srs_error_wrap(err, "load soun track"); + } + } + + return err; +} + +srs_error_t SrsMp4SampleManager::load_trak(map& tses, SrsFrameType tt, + SrsMp4MediaHeaderBox* mdhd, SrsMp4ChunkOffsetBox* stco, SrsMp4SampleSizeBox* stsz, SrsMp4Sample2ChunkBox* stsc, + SrsMp4DecodingTime2SampleBox* stts, SrsMp4CompositionTime2SampleBox* ctts, SrsMp4SyncSampleBox* stss) +{ + srs_error_t err = srs_success; + + // Samples per chunk. + stsc->initialize_counter(); + + // DTS box. + if ((err = stts->initialize_counter()) != srs_success) { + return srs_error_wrap(err, "stts init counter"); + } + + // CTS/PTS box. + if (ctts && (err = ctts->initialize_counter()) != srs_success) { + return srs_error_wrap(err, "ctts init counter"); + } + + SrsMp4Sample* previous = NULL; + + // For each chunk offset. + for (uint32_t ci = 0; ci < stco->entry_count; ci++) { + // The sample offset relative in chunk. + uint32_t sample_relative_offset = 0; + + // Find how many samples from stsc. + SrsMp4StscEntry* stsc_entry = stsc->on_chunk(ci); + for (uint32_t i = 0; i < stsc_entry->samples_per_chunk; i++) { + SrsMp4Sample* sample = new SrsMp4Sample(); + sample->type = tt; + sample->index = (previous? previous->index+1:0); + sample->tbn = mdhd->timescale; + sample->offset = stco->entries[ci] + sample_relative_offset; + + uint32_t sample_size = 0; + if ((err = stsz->get_sample_size(sample->index, &sample_size)) != srs_success) { + srs_freep(sample); + return srs_error_wrap(err, "stsz get sample size"); + } + sample_relative_offset += sample_size; + + SrsMp4SttsEntry* stts_entry = NULL; + if ((err = stts->on_sample(sample->index, &stts_entry)) != srs_success) { + srs_freep(sample); + return srs_error_wrap(err, "stts on sample"); + } + if (previous) { + sample->pts = sample->dts = previous->dts + stts_entry->sample_delta; + } + + SrsMp4CttsEntry* ctts_entry = NULL; + if (ctts && (err = ctts->on_sample(sample->index, &ctts_entry)) != srs_success) { + srs_freep(sample); + return srs_error_wrap(err, "ctts on sample"); + } + if (ctts_entry) { + sample->pts = sample->dts + ctts_entry->sample_offset; + } + + if (tt == SrsFrameTypeVideo) { + if (!stss || stss->is_sync(sample->index)) { + sample->frame_type = SrsVideoAvcFrameTypeKeyFrame; + } else { + sample->frame_type = SrsVideoAvcFrameTypeInterFrame; + } + } + + // Only set the sample size, read data from io when needed. + sample->nb_data = sample_size; + sample->data = NULL; + + previous = sample; + tses[sample->offset] = sample; + } + } + + // Check total samples. + if (previous && previous->index + 1 != stsz->sample_count) { + return srs_error_new(ERROR_MP4_ILLEGAL_SAMPLES, "illegal samples count, expect=%d, actual=%d", stsz->sample_count, previous->index + 1); + } + + return err; +} + +SrsMp4BoxReader::SrsMp4BoxReader() +{ + rsio = NULL; + buf = new char[SRS_MP4_BUF_SIZE]; +} + +SrsMp4BoxReader::~SrsMp4BoxReader() +{ + srs_freepa(buf); +} + +srs_error_t SrsMp4BoxReader::initialize(ISrsReadSeeker* rs) +{ + rsio = rs; + + return srs_success; +} + +srs_error_t SrsMp4BoxReader::read(SrsSimpleStream* stream, SrsMp4Box** ppbox) +{ + srs_error_t err = srs_success; + + SrsMp4Box* box = NULL; + // Note that we should use SrsAutoFree to free the ptr which is set later. + SrsAutoFree(SrsMp4Box, box); + + while (true) { + // For the first time to read the box, maybe it's a basic box which is only 4bytes header. + // When we disconvery the real box, we know the size of the whole box, then read again and decode it. + uint64_t required = box? box->sz():4; + + // For mdat box, we only requires to decode the header. + if (box && box->is_mdat()) { + required = box->sz_header(); + } + + // Fill the stream util we can discovery box. + while (stream->length() < (int)required) { + ssize_t nread; + if ((err = rsio->read(buf, SRS_MP4_BUF_SIZE, &nread)) != srs_success) { + return srs_error_wrap(err, "load failed, nread=%d, required=%d", (int)nread, (int)required); + } + + srs_assert(nread > 0); + stream->append(buf, (int)nread); + } + + SrsUniquePtr buffer(new SrsBuffer(stream->bytes(), stream->length())); + + // Discovery the box with basic header. + if (!box && (err = SrsMp4Box::discovery(buffer.get(), &box)) != srs_success) { + if (srs_error_code(err) == ERROR_MP4_BOX_REQUIRE_SPACE) { + srs_freep(err); + continue; + } + return srs_error_wrap(err, "load box failed"); + } + + // When box is discoveried, check whether we can demux the whole box. + // For mdat, only the header is required. + required = (box->is_mdat()? box->sz_header():box->sz()); + if (!buffer->require((int)required)) { + continue; + } + + if (err == srs_success) { + *ppbox = box; + box = NULL; + } + + break; + } + + return err; +} + +srs_error_t SrsMp4BoxReader::skip(SrsMp4Box* box, SrsSimpleStream* stream) +{ + srs_error_t err = srs_success; + + // For mdat, always skip the content. + if (box->is_mdat()) { + int offset = (int)(box->sz() - stream->length()); + if (offset < 0) { + stream->erase(stream->length() + offset); + } else { + stream->erase(stream->length()); + } + if (offset > 0 && (err = rsio->lseek(offset, SEEK_CUR, NULL)) != srs_success) { + return srs_error_wrap(err, "io seek"); + } + } else { + // Remove the consumed bytes. + stream->erase((int)box->sz()); + } + + return err; +} + +SrsMp4Decoder::SrsMp4Decoder() +{ + rsio = NULL; + brand = SrsMp4BoxBrandForbidden; + stream = new SrsSimpleStream(); + vcodec = SrsVideoCodecIdForbidden; + acodec = SrsAudioCodecIdForbidden; + asc_written = avcc_written = false; + sample_rate = SrsAudioSampleRateForbidden; + sound_bits = SrsAudioSampleBitsForbidden; + channels = SrsAudioChannelsForbidden; + samples = new SrsMp4SampleManager(); + br = new SrsMp4BoxReader(); + current_index = 0; + current_offset = 0; +} + +SrsMp4Decoder::~SrsMp4Decoder() +{ + srs_freep(br); + srs_freep(stream); + srs_freep(samples); +} + +srs_error_t SrsMp4Decoder::initialize(ISrsReadSeeker* rs) +{ + srs_error_t err = srs_success; + + srs_assert(rs); + rsio = rs; + + if ((err = br->initialize(rs)) != srs_success) { + return srs_error_wrap(err, "init box reader"); + } + + // For mdat before moov, we must reset the offset to the mdat. + off_t offset = -1; + + while (true) { + SrsMp4Box* box_raw = NULL; + if ((err = load_next_box(&box_raw, 0)) != srs_success) { + return srs_error_wrap(err, "load box"); + } + SrsUniquePtr box(box_raw); + + if (box->is_ftyp()) { + SrsMp4FileTypeBox* ftyp = dynamic_cast(box.get()); + if ((err = parse_ftyp(ftyp)) != srs_success) { + return srs_error_wrap(err, "parse ftyp"); + } + } else if (box->is_mdat()) { + off_t cur = 0; + if ((err = rsio->lseek(0, SEEK_CUR, &cur)) != srs_success) { + return srs_error_wrap(err, "io seek"); + } + offset = off_t(cur - box->sz()); + } else if (box->is_moov()) { + SrsMp4MovieBox* moov = dynamic_cast(box.get()); + if ((err = parse_moov(moov)) != srs_success) { + return srs_error_wrap(err, "parse moov"); + } + break; + } + } + + if (brand == SrsMp4BoxBrandForbidden) { + return srs_error_new(ERROR_MP4_BOX_ILLEGAL_SCHEMA, "missing ftyp"); + } + + // Set the offset to the mdat. + if (offset >= 0) { + if ((err = rsio->lseek(offset, SEEK_SET, ¤t_offset)) != srs_success) { + return srs_error_wrap(err, "seek to mdat"); + } + } + + return err; +} + +srs_error_t SrsMp4Decoder::read_sample(SrsMp4HandlerType* pht, uint16_t* pft, uint16_t* pct, uint32_t* pdts, uint32_t* ppts, uint8_t** psample, uint32_t* pnb_sample) +{ + srs_error_t err = srs_success; + + if (!avcc_written && !pavcc.empty()) { + avcc_written = true; + *pdts = *ppts = 0; + *pht = SrsMp4HandlerTypeVIDE; + + uint32_t nb_sample = *pnb_sample = (uint32_t)pavcc.size(); + uint8_t* sample = *psample = new uint8_t[nb_sample]; + memcpy(sample, &pavcc[0], nb_sample); + + *pft = SrsVideoAvcFrameTypeKeyFrame; + *pct = SrsVideoAvcFrameTraitSequenceHeader; + + return err; + } + + if (!asc_written && !pasc.empty()) { + asc_written = true; + *pdts = *ppts = 0; + *pht = SrsMp4HandlerTypeSOUN; + + uint32_t nb_sample = *pnb_sample = (uint32_t)pasc.size(); + uint8_t* sample = *psample = new uint8_t[nb_sample]; + memcpy(sample, &pasc[0], nb_sample); + + *pft = 0x00; + *pct = SrsAudioAacFrameTraitSequenceHeader; + + return err; + } + + SrsMp4Sample* ps = samples->at(current_index++); + if (!ps) { + return srs_error_new(ERROR_SYSTEM_FILE_EOF, "EOF"); + } + + if (ps->type == SrsFrameTypeVideo) { + *pht = SrsMp4HandlerTypeVIDE; + *pct = SrsVideoAvcFrameTraitNALU; + } else { + *pht = SrsMp4HandlerTypeSOUN; + *pct = SrsAudioAacFrameTraitRawData; + } + *pdts = ps->dts_ms(); + *ppts = ps->pts_ms(); + *pft = ps->frame_type; + + // Read sample from io, for we never preload the samples(too large). + if (ps->offset != current_offset) { + if ((err = rsio->lseek(ps->offset, SEEK_SET, ¤t_offset)) != srs_success) { + return srs_error_wrap(err, "seek to sample"); + } + } + + uint32_t nb_sample = ps->nb_data; + uint8_t* sample = new uint8_t[nb_sample]; + // TODO: FIXME: Use fully read. + if ((err = rsio->read(sample, nb_sample, NULL)) != srs_success) { + srs_freepa(sample); + return srs_error_wrap(err, "read sample"); + } + + *psample = sample; + *pnb_sample = nb_sample; + current_offset += nb_sample; + + return err; +} + +srs_error_t SrsMp4Decoder::parse_ftyp(SrsMp4FileTypeBox* ftyp) +{ + srs_error_t err = srs_success; + + // File Type Box (ftyp) + bool legal_brand = false; + static SrsMp4BoxBrand legal_brands[] = { + SrsMp4BoxBrandISOM, SrsMp4BoxBrandISO2, SrsMp4BoxBrandAVC1, SrsMp4BoxBrandMP41, + SrsMp4BoxBrandISO5 + }; + for (int i = 0; i < (int)(sizeof(legal_brands)/sizeof(SrsMp4BoxBrand)); i++) { + if (ftyp->major_brand == legal_brands[i]) { + legal_brand = true; + break; + } + } + if (!legal_brand) { + return srs_error_new(ERROR_MP4_BOX_ILLEGAL_BRAND, "brand is illegal, brand=%d", ftyp->major_brand); + } + + brand = ftyp->major_brand; + + return err; +} + +srs_error_t SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) +{ + srs_error_t err = srs_success; + + SrsMp4MovieHeaderBox* mvhd = moov->mvhd(); + if (!mvhd) { + return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing mvhd"); + } + + SrsMp4TrackBox* vide = moov->video(); + SrsMp4TrackBox* soun = moov->audio(); + if (!vide && !soun) { + return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing audio and video track"); + } + + SrsMp4AudioSampleEntry* mp4a = soun? soun->mp4a():NULL; + if (mp4a) { + uint32_t sr = mp4a->samplerate>>16; + if ((sample_rate = srs_audio_sample_rate_from_number(sr)) == SrsAudioSampleRateForbidden) { + sample_rate = srs_audio_sample_rate_guess_number(sr); + } + + if (mp4a->samplesize == 16) { + sound_bits = SrsAudioSampleBits16bit; + } else { + sound_bits = SrsAudioSampleBits8bit; + } + + if (mp4a->channelcount == 2) { + channels = SrsAudioChannelsStereo; + } else { + channels = SrsAudioChannelsMono; + } + } + + SrsMp4AvccBox* avcc = vide? vide->avcc():NULL; + SrsMp4DecoderSpecificInfo* asc = soun? soun->asc():NULL; + if (vide && !avcc) { + return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing video sequence header"); + } + if (soun && !asc && soun->soun_codec() == SrsAudioCodecIdAAC) { + return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "missing audio sequence header"); + } + + vcodec = vide?vide->vide_codec():SrsVideoCodecIdForbidden; + acodec = soun?soun->soun_codec():SrsAudioCodecIdForbidden; + + if (avcc && !avcc->avc_config.empty()) { + pavcc = avcc->avc_config; + } + if (asc && !asc->asc.empty()) { + pasc = asc->asc; + } + + // Build the samples structure from moov. + if ((err = samples->load(moov)) != srs_success) { + return srs_error_wrap(err, "load samples"); + } + + stringstream ss; ss << "dur=" << mvhd->duration() << "ms"; // video codec. ss << ", vide=" << moov->nb_vide_tracks() << "(" @@ -5640,224 +6283,602 @@ srs_error_t SrsMp4Decoder::parse_moov(SrsMp4MovieBox* moov) << "," << srs_audio_sample_rate2str(sample_rate) << ")"; - srs_trace("MP4 moov %s", ss.str().c_str()); + srs_trace("MP4 moov %s", ss.str().c_str()); + + return err; +} + +srs_error_t SrsMp4Decoder::load_next_box(SrsMp4Box** ppbox, uint32_t required_box_type) +{ + srs_error_t err = srs_success; + + while (true) { + SrsMp4Box* box = NULL; + // Note that we should use SrsAutoFree to free the ptr which is set later. + SrsAutoFree(SrsMp4Box, box); + + if ((err = do_load_next_box(&box, required_box_type)) != srs_success) { + return srs_error_wrap(err, "load box"); + } + + if (!required_box_type || (uint32_t)box->type == required_box_type) { + *ppbox = box; + box = NULL; + break; + } + } + + return err; +} + +srs_error_t SrsMp4Decoder::do_load_next_box(SrsMp4Box** ppbox, uint32_t required_box_type) +{ + srs_error_t err = srs_success; + + while (true) { + SrsMp4Box* box = NULL; + + if ((err = br->read(stream, &box)) != srs_success) { + return srs_error_wrap(err, "read box"); + } + + SrsUniquePtr buffer(new SrsBuffer(stream->bytes(), stream->length())); + + // Decode the box: + // 1. Any box, when no box type is required. + // 2. Matched box, when box type match the required type. + // 3. Mdat box, always decode the mdat because we only decode the header of it. + if (!required_box_type || (uint32_t)box->type == required_box_type || box->is_mdat()) { + err = box->decode(buffer.get()); + } + + // Skip the box from stream, move stream to next box. + // For mdat box, skip the content in stream or underylayer reader. + // For other boxes, skip it from stream because we already decoded it or ignore it. + if (err == srs_success) { + err = br->skip(box, stream); + } + + if (err != srs_success) { + srs_freep(box); + err = srs_error_wrap(err, "decode box"); + } else { + *ppbox = box; + } + + break; + } + + return err; +} + +SrsMp4Encoder::SrsMp4Encoder() +{ + wsio = NULL; + mdat_bytes = 0; + mdat_offset = 0; + nb_audios = nb_videos = 0; + samples = new SrsMp4SampleManager(); + aduration = vduration = 0; + width = height = 0; + + acodec = SrsAudioCodecIdForbidden; + sample_rate = SrsAudioSampleRateForbidden; + sound_bits = SrsAudioSampleBitsForbidden; + channels = SrsAudioChannelsForbidden; + vcodec = SrsVideoCodecIdForbidden; +} + +SrsMp4Encoder::~SrsMp4Encoder() +{ + srs_freep(samples); +} + +srs_error_t SrsMp4Encoder::initialize(ISrsWriteSeeker* ws) +{ + srs_error_t err = srs_success; + + wsio = ws; + + // Write ftyp box. + if (true) { + SrsUniquePtr ftyp(new SrsMp4FileTypeBox()); + + ftyp->major_brand = SrsMp4BoxBrandISOM; + ftyp->minor_version = 512; + ftyp->set_compatible_brands(SrsMp4BoxBrandISOM, SrsMp4BoxBrandISO2, SrsMp4BoxBrandMP41); + + int nb_data = ftyp->nb_bytes(); + std::vector data(nb_data); + + SrsUniquePtr buffer(new SrsBuffer(&data[0], nb_data)); + if ((err = ftyp->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode ftyp"); + } + + // TODO: FIXME: Ensure write ok. + if ((err = wsio->write(&data[0], nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write ftyp"); + } + } + + // 8B reserved free box. + if (true) { + SrsUniquePtr freeb(new SrsMp4FreeSpaceBox(SrsMp4BoxTypeFREE)); + + int nb_data = freeb->nb_bytes(); + std::vector data(nb_data); + + SrsUniquePtr buffer(new SrsBuffer(&data[0], nb_data)); + if ((err = freeb->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode free box"); + } + + if ((err = wsio->write(&data[0], nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write free box"); + } + } + + // Write mdat box. + if (true) { + // Write empty mdat box, + // its payload will be writen by samples, + // and we will update its header(size) when flush. + SrsUniquePtr mdat(new SrsMp4MediaDataBox()); + + // Update the mdat box from this offset. + if ((err = wsio->lseek(0, SEEK_CUR, &mdat_offset)) != srs_success) { + return srs_error_wrap(err, "seek to mdat"); + } + + int nb_data = mdat->sz_header(); + SrsUniquePtr data(new uint8_t[nb_data]); + + SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); + if ((err = mdat->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode mdat"); + } + + // TODO: FIXME: Ensure all bytes are writen. + if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write mdat"); + } + + mdat_bytes = 0; + } return err; } -srs_error_t SrsMp4Decoder::load_next_box(SrsMp4Box** ppbox, uint32_t required_box_type) +srs_error_t SrsMp4Encoder::write_sample( + SrsFormat* format, SrsMp4HandlerType ht, uint16_t ft, uint16_t ct, uint32_t dts, uint32_t pts, + uint8_t* sample, uint32_t nb_sample +) { + srs_error_t err = srs_success; + + SrsMp4Sample* ps = new SrsMp4Sample(); + + // For SPS/PPS or ASC, copy it to moov. + bool vsh = (ht == SrsMp4HandlerTypeVIDE) && (ct == (uint16_t)SrsVideoAvcFrameTraitSequenceHeader); + bool ash = (ht == SrsMp4HandlerTypeSOUN) && (ct == (uint16_t)SrsAudioAacFrameTraitSequenceHeader); + if (vsh || ash) { + err = copy_sequence_header(format, vsh, sample, nb_sample); + srs_freep(ps); + return err; + } + + if (ht == SrsMp4HandlerTypeVIDE) { + ps->type = SrsFrameTypeVideo; + ps->frame_type = (SrsVideoAvcFrameType)ft; + ps->index = nb_videos++; + vduration = dts; + } else if (ht == SrsMp4HandlerTypeSOUN) { + ps->type = SrsFrameTypeAudio; + ps->index = nb_audios++; + aduration = dts; + } else { + srs_freep(ps); + return err; + } + ps->tbn = 1000; + ps->dts = dts; + ps->pts = pts; + + if ((err = do_write_sample(ps, sample, nb_sample)) != srs_success) { + srs_freep(ps); + return srs_error_wrap(err, "write sample"); + } + + // Append to manager to build the moov. + samples->append(ps); + + return err; +} + +srs_error_t SrsMp4Encoder::flush() { srs_error_t err = srs_success; - while (true) { - SrsMp4Box* box = NULL; - // Note that we should use SrsAutoFree to free the ptr which is set later. - SrsAutoFree(SrsMp4Box, box); + if (!nb_audios && !nb_videos) { + return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "Missing audio and video track"); + } + + // Write moov. + if (true) { + SrsUniquePtr moov(new SrsMp4MovieBox()); + + SrsMp4MovieHeaderBox* mvhd = new SrsMp4MovieHeaderBox(); + moov->set_mvhd(mvhd); + + mvhd->timescale = 1000; // Use tbn ms. + mvhd->duration_in_tbn = srs_max(vduration, aduration); + mvhd->next_track_ID = 1; // Starts from 1, increase when use it. + + if (nb_videos || !pavcc.empty() || !phvcc.empty()) { + SrsMp4TrackBox* trak = new SrsMp4TrackBox(); + moov->add_trak(trak); + + SrsMp4EditBox* edts = new SrsMp4EditBox(); + trak->set_edts(edts); + + SrsMp4EditListBox* elst = new SrsMp4EditListBox(); + edts->set_elst(elst); + elst->version = 0; + + SrsMp4ElstEntry entry; + entry.segment_duration = mvhd->duration_in_tbn; + entry.media_rate_integer = 1; + elst->entries.push_back(entry); + + SrsMp4TrackHeaderBox* tkhd = new SrsMp4TrackHeaderBox(); + trak->set_tkhd(tkhd); + + tkhd->track_ID = mvhd->next_track_ID++; + tkhd->duration = vduration; + tkhd->width = (width << 16); + tkhd->height = (height << 16); + + SrsMp4MediaBox* mdia = new SrsMp4MediaBox(); + trak->set_mdia(mdia); + + SrsMp4MediaHeaderBox* mdhd = new SrsMp4MediaHeaderBox(); + mdia->set_mdhd(mdhd); + + mdhd->timescale = 1000; + mdhd->duration = vduration; + mdhd->set_language0('u'); + mdhd->set_language1('n'); + mdhd->set_language2('d'); + + SrsMp4HandlerReferenceBox* hdlr = new SrsMp4HandlerReferenceBox(); + mdia->set_hdlr(hdlr); + + hdlr->handler_type = SrsMp4HandlerTypeVIDE; + hdlr->name = "VideoHandler"; + + SrsMp4MediaInformationBox* minf = new SrsMp4MediaInformationBox(); + mdia->set_minf(minf); + + SrsMp4VideoMeidaHeaderBox* vmhd = new SrsMp4VideoMeidaHeaderBox(); + minf->set_vmhd(vmhd); + + SrsMp4DataInformationBox* dinf = new SrsMp4DataInformationBox(); + minf->set_dinf(dinf); + + SrsMp4DataReferenceBox* dref = new SrsMp4DataReferenceBox(); + dinf->set_dref(dref); + + SrsMp4DataEntryBox* url = new SrsMp4DataEntryUrlBox(); + dref->append(url); + + SrsMp4SampleTableBox* stbl = new SrsMp4SampleTableBox(); + minf->set_stbl(stbl); + + SrsMp4SampleDescriptionBox* stsd = new SrsMp4SampleDescriptionBox(); + stbl->set_stsd(stsd); + + if (vcodec == SrsVideoCodecIdAVC) { + SrsMp4VisualSampleEntry* avc1 = new SrsMp4VisualSampleEntry(SrsMp4BoxTypeAVC1); + stsd->append(avc1); + + avc1->width = width; + avc1->height = height; + avc1->data_reference_index = 1; + + SrsMp4AvccBox* avcC = new SrsMp4AvccBox(); + avc1->set_avcC(avcC); + + avcC->avc_config = pavcc; + } else { + SrsMp4VisualSampleEntry* hev1 = new SrsMp4VisualSampleEntry(SrsMp4BoxTypeHEV1); + stsd->append(hev1); + + hev1->width = width; + hev1->height = height; + hev1->data_reference_index = 1; + + SrsMp4HvcCBox* hvcC = new SrsMp4HvcCBox(); + hev1->set_hvcC(hvcC); + + hvcC->hevc_config = phvcc; + } + } + + if (nb_audios || !pasc.empty()) { + SrsMp4TrackBox* trak = new SrsMp4TrackBox(); + moov->add_trak(trak); + + SrsMp4TrackHeaderBox* tkhd = new SrsMp4TrackHeaderBox(); + tkhd->volume = 0x0100; + trak->set_tkhd(tkhd); + + tkhd->track_ID = mvhd->next_track_ID++; + tkhd->duration = aduration; + + SrsMp4MediaBox* mdia = new SrsMp4MediaBox(); + trak->set_mdia(mdia); + + SrsMp4MediaHeaderBox* mdhd = new SrsMp4MediaHeaderBox(); + mdia->set_mdhd(mdhd); + + mdhd->timescale = 1000; + mdhd->duration = aduration; + mdhd->set_language0('u'); + mdhd->set_language1('n'); + mdhd->set_language2('d'); + + SrsMp4HandlerReferenceBox* hdlr = new SrsMp4HandlerReferenceBox(); + mdia->set_hdlr(hdlr); + + hdlr->handler_type = SrsMp4HandlerTypeSOUN; + hdlr->name = "SoundHandler"; + + SrsMp4MediaInformationBox* minf = new SrsMp4MediaInformationBox(); + mdia->set_minf(minf); + + SrsMp4SoundMeidaHeaderBox* smhd = new SrsMp4SoundMeidaHeaderBox(); + minf->set_smhd(smhd); + + SrsMp4DataInformationBox* dinf = new SrsMp4DataInformationBox(); + minf->set_dinf(dinf); + + SrsMp4DataReferenceBox* dref = new SrsMp4DataReferenceBox(); + dinf->set_dref(dref); + + SrsMp4DataEntryBox* url = new SrsMp4DataEntryUrlBox(); + dref->append(url); + + SrsMp4SampleTableBox* stbl = new SrsMp4SampleTableBox(); + minf->set_stbl(stbl); + + SrsMp4SampleDescriptionBox* stsd = new SrsMp4SampleDescriptionBox(); + stbl->set_stsd(stsd); + + SrsMp4AudioSampleEntry* mp4a = new SrsMp4AudioSampleEntry(); + mp4a->data_reference_index = 1; + mp4a->samplerate = srs_audio_sample_rate2number(sample_rate); + if (sound_bits == SrsAudioSampleBits16bit) { + mp4a->samplesize = 16; + } else { + mp4a->samplesize = 8; + } + if (channels == SrsAudioChannelsStereo) { + mp4a->channelcount = 2; + } else { + mp4a->channelcount = 1; + } + stsd->append(mp4a); + + SrsMp4EsdsBox* esds = new SrsMp4EsdsBox(); + mp4a->set_esds(esds); + + SrsMp4ES_Descriptor* es = esds->es; + es->ES_ID = 0x02; + + SrsMp4DecoderConfigDescriptor& desc = es->decConfigDescr; + desc.objectTypeIndication = get_audio_object_type(); + desc.streamType = SrsMp4StreamTypeAudioStream; + srs_freep(desc.decSpecificInfo); + + if (SrsMp4ObjectTypeAac == desc.objectTypeIndication) { + SrsMp4DecoderSpecificInfo* asc = new SrsMp4DecoderSpecificInfo(); + desc.decSpecificInfo = asc; + asc->asc = pasc; + } + } + + if ((err = samples->write(moov.get())) != srs_success) { + return srs_error_wrap(err, "write samples"); + } + + int nb_data = moov->nb_bytes(); + SrsUniquePtr data(new uint8_t[nb_data]); - if ((err = do_load_next_box(&box, required_box_type)) != srs_success) { - return srs_error_wrap(err, "load box"); + SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); + if ((err = moov->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode moov"); } - if (!required_box_type || (uint32_t)box->type == required_box_type) { - *ppbox = box; - box = NULL; - break; + // TODO: FIXME: Ensure all bytes are writen. + if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write moov"); } } - return err; -} + // Write mdat box. + if (true) { + // Write mdat box with size of data, + // its payload already writen by samples, + // and we will update its header(size) when flush. + SrsUniquePtr mdat(new SrsMp4MediaDataBox()); -srs_error_t SrsMp4Decoder::do_load_next_box(SrsMp4Box** ppbox, uint32_t required_box_type) -{ - srs_error_t err = srs_success; - - while (true) { - SrsMp4Box* box = NULL; + // Update the size of mdat first, for over 2GB file. + mdat->nb_data = mdat_bytes; + mdat->update_size(); - if ((err = br->read(stream, &box)) != srs_success) { - return srs_error_wrap(err, "read box"); - } + int nb_data = mdat->sz_header(); + SrsUniquePtr data(new uint8_t[nb_data]); - SrsUniquePtr buffer(new SrsBuffer(stream->bytes(), stream->length())); + SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); + if ((err = mdat->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode mdat"); + } - // Decode the box: - // 1. Any box, when no box type is required. - // 2. Matched box, when box type match the required type. - // 3. Mdat box, always decode the mdat because we only decode the header of it. - if (!required_box_type || (uint32_t)box->type == required_box_type || box->is_mdat()) { - err = box->decode(buffer.get()); + // We might adjust the offset of mdat, for large size, 2GB+ as such. + if (nb_data > 8) { + // For large size, the header of mdat MUST be 16. + if (nb_data != 16) { + return srs_error_new(ERROR_MP4_ILLEGAL_MDAT, "Invalid mdat header size %d", nb_data); + } + // Use large size, to the start of reserved free box. + mdat_offset -= 8; } - - // Skip the box from stream, move stream to next box. - // For mdat box, skip the content in stream or underylayer reader. - // For other boxes, skip it from stream because we already decoded it or ignore it. - if (err == srs_success) { - err = br->skip(box, stream); + + // Seek to the start of mdat. + if ((err = wsio->lseek(mdat_offset, SEEK_SET, NULL)) != srs_success) { + return srs_error_wrap(err, "seek to mdat"); } - if (err != srs_success) { - srs_freep(box); - err = srs_error_wrap(err, "decode box"); - } else { - *ppbox = box; + // TODO: FIXME: Ensure all bytes are writen. + if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write mdat"); } - - break; } return err; } -SrsMp4Encoder::SrsMp4Encoder() -{ - wsio = NULL; - mdat_bytes = 0; - mdat_offset = 0; - nb_audios = nb_videos = 0; - samples = new SrsMp4SampleManager(); - aduration = vduration = 0; - width = height = 0; - - acodec = SrsAudioCodecIdForbidden; - sample_rate = SrsAudioSampleRateForbidden; - sound_bits = SrsAudioSampleBitsForbidden; - channels = SrsAudioChannelsForbidden; - vcodec = SrsVideoCodecIdForbidden; -} - -SrsMp4Encoder::~SrsMp4Encoder() -{ - srs_freep(samples); -} - -srs_error_t SrsMp4Encoder::initialize(ISrsWriteSeeker* ws) +srs_error_t SrsMp4Encoder::copy_sequence_header(SrsFormat* format, bool vsh, uint8_t* sample, uint32_t nb_sample) { srs_error_t err = srs_success; - - wsio = ws; - - // Write ftyp box. - if (true) { - SrsUniquePtr ftyp(new SrsMp4FileTypeBox()); - - ftyp->major_brand = SrsMp4BoxBrandISOM; - ftyp->minor_version = 512; - ftyp->set_compatible_brands(SrsMp4BoxBrandISOM, SrsMp4BoxBrandISO2, SrsMp4BoxBrandMP41); - - int nb_data = ftyp->nb_bytes(); - std::vector data(nb_data); - - SrsUniquePtr buffer(new SrsBuffer(&data[0], nb_data)); - if ((err = ftyp->encode(buffer.get())) != srs_success) { - return srs_error_wrap(err, "encode ftyp"); - } - - // TODO: FIXME: Ensure write ok. - if ((err = wsio->write(&data[0], nb_data, NULL)) != srs_success) { - return srs_error_wrap(err, "write ftyp"); - } - } - - // 8B reserved free box. - if (true) { - SrsUniquePtr freeb(new SrsMp4FreeSpaceBox(SrsMp4BoxTypeFREE)); - int nb_data = freeb->nb_bytes(); - std::vector data(nb_data); + if (vsh) { + // AVC + if (format->vcodec->id == SrsVideoCodecIdAVC && !pavcc.empty()) { + if (nb_sample == (uint32_t)pavcc.size() && srs_bytes_equals(sample, &pavcc[0], (int)pavcc.size())) { + return err; + } - SrsUniquePtr buffer(new SrsBuffer(&data[0], nb_data)); - if ((err = freeb->encode(buffer.get())) != srs_success) { - return srs_error_wrap(err, "encode free box"); + return srs_error_new(ERROR_MP4_AVCC_CHANGE, "doesn't support avcc change"); } + // HEVC + if (format->vcodec->id == SrsVideoCodecIdHEVC && !phvcc.empty()) { + if (nb_sample == (uint32_t)phvcc.size() && srs_bytes_equals(sample, &phvcc[0], (int)phvcc.size())) { + return err; + } - if ((err = wsio->write(&data[0], nb_data, NULL)) != srs_success) { - return srs_error_wrap(err, "write free box"); + return srs_error_new(ERROR_MP4_HVCC_CHANGE, "doesn't support hvcC change"); } } - // Write mdat box. - if (true) { - // Write empty mdat box, - // its payload will be writen by samples, - // and we will update its header(size) when flush. - SrsUniquePtr mdat(new SrsMp4MediaDataBox()); - - // Update the mdat box from this offset. - if ((err = wsio->lseek(0, SEEK_CUR, &mdat_offset)) != srs_success) { - return srs_error_wrap(err, "seek to mdat"); + if (!vsh && !pasc.empty()) { + if (nb_sample == (uint32_t)pasc.size() && srs_bytes_equals(sample, &pasc[0], (int)pasc.size())) { + return err; } - int nb_data = mdat->sz_header(); - SrsUniquePtr data(new uint8_t[nb_data]); - - SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); - if ((err = mdat->encode(buffer.get())) != srs_success) { - return srs_error_wrap(err, "encode mdat"); + return srs_error_new(ERROR_MP4_ASC_CHANGE, "doesn't support asc change"); + } + + if (vsh) { + if (format->vcodec->id == SrsVideoCodecIdHEVC) { + phvcc = std::vector(sample, sample + nb_sample); + } else { + pavcc = std::vector(sample, sample + nb_sample); } - - // TODO: FIXME: Ensure all bytes are writen. - if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { - return srs_error_wrap(err, "write mdat"); + if (format && format->vcodec) { + width = format->vcodec->width; + height = format->vcodec->height; } - - mdat_bytes = 0; + } + + if (!vsh) { + pasc = std::vector(sample, sample + nb_sample); } return err; } -srs_error_t SrsMp4Encoder::write_sample( - SrsFormat* format, SrsMp4HandlerType ht, uint16_t ft, uint16_t ct, uint32_t dts, uint32_t pts, - uint8_t* sample, uint32_t nb_sample -) { +srs_error_t SrsMp4Encoder::do_write_sample(SrsMp4Sample* ps, uint8_t* sample, uint32_t nb_sample) +{ srs_error_t err = srs_success; - SrsMp4Sample* ps = new SrsMp4Sample(); + ps->nb_data = nb_sample; + // Never copy data, for we already writen to writer. + ps->data = NULL; - // For SPS/PPS or ASC, copy it to moov. - bool vsh = (ht == SrsMp4HandlerTypeVIDE) && (ct == (uint16_t)SrsVideoAvcFrameTraitSequenceHeader); - bool ash = (ht == SrsMp4HandlerTypeSOUN) && (ct == (uint16_t)SrsAudioAacFrameTraitSequenceHeader); - if (vsh || ash) { - err = copy_sequence_header(format, vsh, sample, nb_sample); - srs_freep(ps); - return err; + // Update the mdat box from this offset. + if ((err = wsio->lseek(0, SEEK_CUR, &ps->offset)) != srs_success) { + return srs_error_wrap(err, "seek to offset in mdat"); } - if (ht == SrsMp4HandlerTypeVIDE) { - ps->type = SrsFrameTypeVideo; - ps->frame_type = (SrsVideoAvcFrameType)ft; - ps->index = nb_videos++; - vduration = dts; - } else if (ht == SrsMp4HandlerTypeSOUN) { - ps->type = SrsFrameTypeAudio; - ps->index = nb_audios++; - aduration = dts; - } else { - srs_freep(ps); - return err; + // TODO: FIXME: Ensure all bytes are writen. + if ((err = wsio->write(sample, nb_sample, NULL)) != srs_success) { + return srs_error_wrap(err, "write sample"); } - ps->tbn = 1000; - ps->dts = dts; - ps->pts = pts; - if ((err = do_write_sample(ps, sample, nb_sample)) != srs_success) { - srs_freep(ps); - return srs_error_wrap(err, "write sample"); + mdat_bytes += nb_sample; + + return err; +} + +SrsMp4ObjectType SrsMp4Encoder::get_audio_object_type() +{ + switch (acodec) { + case SrsAudioCodecIdAAC: + return SrsMp4ObjectTypeAac; + case SrsAudioCodecIdMP3: + return (srs_audio_sample_rate2number(sample_rate) > 24000) ? SrsMp4ObjectTypeMp1a : SrsMp4ObjectTypeMp3; // 11172 - 3 + default: + return SrsMp4ObjectTypeForbidden; } - - // Append to manager to build the moov. - samples->append(ps); - - return err; } -srs_error_t SrsMp4Encoder::flush() +SrsMp4M2tsInitEncoder::SrsMp4M2tsInitEncoder() +{ + writer = NULL; + crypt_byte_block_ = 0; + skip_byte_block_ = 0; + iv_size_ = 0; + is_protected_ = false; +} + +SrsMp4M2tsInitEncoder::~SrsMp4M2tsInitEncoder() +{ +} + +srs_error_t SrsMp4M2tsInitEncoder::initialize(ISrsWriter* w) +{ + writer = w; + return srs_success; +} + +void SrsMp4M2tsInitEncoder::config_encryption(uint8_t crypt_byte_block, uint8_t skip_byte_block, unsigned char* kid, unsigned char* iv, uint8_t iv_size) +{ + srs_assert(crypt_byte_block + skip_byte_block == 10); + srs_assert(iv_size == 8 || iv_size == 16); + crypt_byte_block_ = crypt_byte_block; + skip_byte_block_ = skip_byte_block; + memcpy(kid_, kid, 16); + memcpy(iv_, iv, iv_size); + iv_size_ = iv_size; + is_protected_ = true; +} + +srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) { srs_error_t err = srs_success; - if (!nb_audios && !nb_videos) { - return srs_error_new(ERROR_MP4_ILLEGAL_MOOV, "Missing audio and video track"); + // Write ftyp box. + if (true) { + SrsUniquePtr ftyp(new SrsMp4FileTypeBox()); + + ftyp->major_brand = SrsMp4BoxBrandISO5; + ftyp->minor_version = 512; + ftyp->set_compatible_brands(SrsMp4BoxBrandISO6, SrsMp4BoxBrandMP41); + + if ((err = srs_mp4_write_box(writer, ftyp.get())) != srs_success) { + return srs_error_wrap(err, "write ftyp"); + } } // Write moov. @@ -5868,32 +6889,20 @@ srs_error_t SrsMp4Encoder::flush() moov->set_mvhd(mvhd); mvhd->timescale = 1000; // Use tbn ms. - mvhd->duration_in_tbn = srs_max(vduration, aduration); - mvhd->next_track_ID = 1; // Starts from 1, increase when use it. + mvhd->duration_in_tbn = 0; + mvhd->next_track_ID = tid; - if (nb_videos || !pavcc.empty() || !phvcc.empty()) { + if (video) { SrsMp4TrackBox* trak = new SrsMp4TrackBox(); moov->add_trak(trak); - - SrsMp4EditBox* edts = new SrsMp4EditBox(); - trak->set_edts(edts); - - SrsMp4EditListBox* elst = new SrsMp4EditListBox(); - edts->set_elst(elst); - elst->version = 0; - - SrsMp4ElstEntry entry; - entry.segment_duration = mvhd->duration_in_tbn; - entry.media_rate_integer = 1; - elst->entries.push_back(entry); SrsMp4TrackHeaderBox* tkhd = new SrsMp4TrackHeaderBox(); trak->set_tkhd(tkhd); tkhd->track_ID = mvhd->next_track_ID++; - tkhd->duration = vduration; - tkhd->width = (width << 16); - tkhd->height = (height << 16); + tkhd->duration = 0; + tkhd->width = (format->vcodec->width << 16); + tkhd->height = (format->vcodec->height << 16); SrsMp4MediaBox* mdia = new SrsMp4MediaBox(); trak->set_mdia(mdia); @@ -5902,7 +6911,7 @@ srs_error_t SrsMp4Encoder::flush() mdia->set_mdhd(mdhd); mdhd->timescale = 1000; - mdhd->duration = vduration; + mdhd->duration = 0; mdhd->set_language0('u'); mdhd->set_language1('n'); mdhd->set_language2('d'); @@ -5934,34 +6943,62 @@ srs_error_t SrsMp4Encoder::flush() SrsMp4SampleDescriptionBox* stsd = new SrsMp4SampleDescriptionBox(); stbl->set_stsd(stsd); - if (vcodec == SrsVideoCodecIdAVC) { + if (format->vcodec->id == SrsVideoCodecIdAVC) { SrsMp4VisualSampleEntry* avc1 = new SrsMp4VisualSampleEntry(SrsMp4BoxTypeAVC1); stsd->append(avc1); - avc1->width = width; - avc1->height = height; + avc1->width = format->vcodec->width; + avc1->height = format->vcodec->height; avc1->data_reference_index = 1; SrsMp4AvccBox* avcC = new SrsMp4AvccBox(); avc1->set_avcC(avcC); - avcC->avc_config = pavcc; + avcC->avc_config = format->vcodec->avc_extra_data; + + if (is_protected_ && ((err = config_sample_description_encryption(avc1)) != srs_success)) { + return srs_error_wrap(err, "encrypt avc1 box"); + } } else { SrsMp4VisualSampleEntry* hev1 = new SrsMp4VisualSampleEntry(SrsMp4BoxTypeHEV1); stsd->append(hev1); - hev1->width = width; - hev1->height = height; + hev1->width = format->vcodec->width; + hev1->height = format->vcodec->height; hev1->data_reference_index = 1; SrsMp4HvcCBox* hvcC = new SrsMp4HvcCBox(); hev1->set_hvcC(hvcC); - hvcC->hevc_config = phvcc; + hvcC->hevc_config = format->vcodec->avc_extra_data; + + if (is_protected_ && ((err = config_sample_description_encryption(hev1)) != srs_success)) { + return srs_error_wrap(err, "encrypt hev1 box"); + } } - } - - if (nb_audios || !pasc.empty()) { + + SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); + stbl->set_stts(stts); + + SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); + stbl->set_stsc(stsc); + + SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); + stbl->set_stsz(stsz); + + // TODO: FIXME: need to check using stco or co64? + SrsMp4ChunkOffsetBox* stco = new SrsMp4ChunkOffsetBox(); + stbl->set_stco(stco); + + SrsMp4MovieExtendsBox* mvex = new SrsMp4MovieExtendsBox(); + moov->set_mvex(mvex); + + SrsMp4TrackExtendsBox* trex = new SrsMp4TrackExtendsBox(); + mvex->add_trex(trex); + + trex->track_ID = tid; + trex->default_sample_description_index = 1; + } else { SrsMp4TrackBox* trak = new SrsMp4TrackBox(); moov->add_trak(trak); @@ -5970,7 +7007,7 @@ srs_error_t SrsMp4Encoder::flush() trak->set_tkhd(tkhd); tkhd->track_ID = mvhd->next_track_ID++; - tkhd->duration = aduration; + tkhd->duration = 0; SrsMp4MediaBox* mdia = new SrsMp4MediaBox(); trak->set_mdia(mdia); @@ -5979,7 +7016,7 @@ srs_error_t SrsMp4Encoder::flush() mdia->set_mdhd(mdhd); mdhd->timescale = 1000; - mdhd->duration = aduration; + mdhd->duration = 0; mdhd->set_language0('u'); mdhd->set_language1('n'); mdhd->set_language2('d'); @@ -6013,13 +7050,13 @@ srs_error_t SrsMp4Encoder::flush() SrsMp4AudioSampleEntry* mp4a = new SrsMp4AudioSampleEntry(); mp4a->data_reference_index = 1; - mp4a->samplerate = srs_audio_sample_rate2number(sample_rate); - if (sound_bits == SrsAudioSampleBits16bit) { + mp4a->samplerate = uint32_t(srs_flv_srates[format->acodec->sound_rate]) << 16; + if (format->acodec->sound_size == SrsAudioSampleBits16bit) { mp4a->samplesize = 16; } else { mp4a->samplesize = 8; } - if (channels == SrsAudioChannelsStereo) { + if (format->acodec->sound_type == SrsAudioChannelsStereo) { mp4a->channelcount = 2; } else { mp4a->channelcount = 1; @@ -6028,184 +7065,55 @@ srs_error_t SrsMp4Encoder::flush() SrsMp4EsdsBox* esds = new SrsMp4EsdsBox(); mp4a->set_esds(esds); + + if (is_protected_ && ((err = config_sample_description_encryption(mp4a)) != srs_success)) { + return srs_error_wrap(err, "encrypt mp4a box"); + } SrsMp4ES_Descriptor* es = esds->es; es->ES_ID = 0x02; SrsMp4DecoderConfigDescriptor& desc = es->decConfigDescr; - desc.objectTypeIndication = get_audio_object_type(); + desc.objectTypeIndication = SrsMp4ObjectTypeAac; desc.streamType = SrsMp4StreamTypeAudioStream; srs_freep(desc.decSpecificInfo); - if (SrsMp4ObjectTypeAac == desc.objectTypeIndication) { - SrsMp4DecoderSpecificInfo* asc = new SrsMp4DecoderSpecificInfo(); - desc.decSpecificInfo = asc; - asc->asc = pasc; - } - } - - if ((err = samples->write(moov.get())) != srs_success) { - return srs_error_wrap(err, "write samples"); - } - - int nb_data = moov->nb_bytes(); - SrsUniquePtr data(new uint8_t[nb_data]); - - SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); - if ((err = moov->encode(buffer.get())) != srs_success) { - return srs_error_wrap(err, "encode moov"); - } - - // TODO: FIXME: Ensure all bytes are writen. - if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { - return srs_error_wrap(err, "write moov"); - } - } - - // Write mdat box. - if (true) { - // Write mdat box with size of data, - // its payload already writen by samples, - // and we will update its header(size) when flush. - SrsUniquePtr mdat(new SrsMp4MediaDataBox()); - - // Update the size of mdat first, for over 2GB file. - mdat->nb_data = mdat_bytes; - mdat->update_size(); - - int nb_data = mdat->sz_header(); - SrsUniquePtr data(new uint8_t[nb_data]); - - SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); - if ((err = mdat->encode(buffer.get())) != srs_success) { - return srs_error_wrap(err, "encode mdat"); - } - - // We might adjust the offset of mdat, for large size, 2GB+ as such. - if (nb_data > 8) { - // For large size, the header of mdat MUST be 16. - if (nb_data != 16) { - return srs_error_new(ERROR_MP4_ILLEGAL_MDAT, "Invalid mdat header size %d", nb_data); - } - // Use large size, to the start of reserved free box. - mdat_offset -= 8; - } - - // Seek to the start of mdat. - if ((err = wsio->lseek(mdat_offset, SEEK_SET, NULL)) != srs_success) { - return srs_error_wrap(err, "seek to mdat"); - } - - // TODO: FIXME: Ensure all bytes are writen. - if ((err = wsio->write(data.get(), nb_data, NULL)) != srs_success) { - return srs_error_wrap(err, "write mdat"); - } - } - - return err; -} - -srs_error_t SrsMp4Encoder::copy_sequence_header(SrsFormat* format, bool vsh, uint8_t* sample, uint32_t nb_sample) -{ - srs_error_t err = srs_success; - - if (vsh) { - // AVC - if (format->vcodec->id == SrsVideoCodecIdAVC && !pavcc.empty()) { - if (nb_sample == (uint32_t)pavcc.size() && srs_bytes_equals(sample, &pavcc[0], (int)pavcc.size())) { - return err; - } - - return srs_error_new(ERROR_MP4_AVCC_CHANGE, "doesn't support avcc change"); - } - // HEVC - if (format->vcodec->id == SrsVideoCodecIdHEVC && !phvcc.empty()) { - if (nb_sample == (uint32_t)phvcc.size() && srs_bytes_equals(sample, &phvcc[0], (int)phvcc.size())) { - return err; - } - - return srs_error_new(ERROR_MP4_HVCC_CHANGE, "doesn't support hvcC change"); - } - } - - if (!vsh && !pasc.empty()) { - if (nb_sample == (uint32_t)pasc.size() && srs_bytes_equals(sample, &pasc[0], (int)pasc.size())) { - return err; - } - - return srs_error_new(ERROR_MP4_ASC_CHANGE, "doesn't support asc change"); - } - - if (vsh) { - if (format->vcodec->id == SrsVideoCodecIdHEVC) { - phvcc = std::vector(sample, sample + nb_sample); - } else { - pavcc = std::vector(sample, sample + nb_sample); - } - if (format && format->vcodec) { - width = format->vcodec->width; - height = format->vcodec->height; + SrsMp4DecoderSpecificInfo* asc = new SrsMp4DecoderSpecificInfo(); + desc.decSpecificInfo = asc; + asc->asc = format->acodec->aac_extra_data; + + SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); + stbl->set_stts(stts); + + SrsMp4Sample2ChunkBox* stsc = new SrsMp4Sample2ChunkBox(); + stbl->set_stsc(stsc); + + SrsMp4SampleSizeBox* stsz = new SrsMp4SampleSizeBox(); + stbl->set_stsz(stsz); + + // TODO: FIXME: need to check using stco or co64? + SrsMp4ChunkOffsetBox* stco = new SrsMp4ChunkOffsetBox(); + stbl->set_stco(stco); + + SrsMp4MovieExtendsBox* mvex = new SrsMp4MovieExtendsBox(); + moov->set_mvex(mvex); + + SrsMp4TrackExtendsBox* trex = new SrsMp4TrackExtendsBox(); + mvex->add_trex(trex); + + trex->track_ID = tid; + trex->default_sample_description_index = 1; } - } - - if (!vsh) { - pasc = std::vector(sample, sample + nb_sample); - } - - return err; -} -srs_error_t SrsMp4Encoder::do_write_sample(SrsMp4Sample* ps, uint8_t* sample, uint32_t nb_sample) -{ - srs_error_t err = srs_success; - - ps->nb_data = nb_sample; - // Never copy data, for we already writen to writer. - ps->data = NULL; - - // Update the mdat box from this offset. - if ((err = wsio->lseek(0, SEEK_CUR, &ps->offset)) != srs_success) { - return srs_error_wrap(err, "seek to offset in mdat"); - } - - // TODO: FIXME: Ensure all bytes are writen. - if ((err = wsio->write(sample, nb_sample, NULL)) != srs_success) { - return srs_error_wrap(err, "write sample"); + if ((err = srs_mp4_write_box(writer, moov.get())) != srs_success) { + return srs_error_wrap(err, "write moov"); + } } - mdat_bytes += nb_sample; - return err; } -SrsMp4ObjectType SrsMp4Encoder::get_audio_object_type() -{ - switch (acodec) { - case SrsAudioCodecIdAAC: - return SrsMp4ObjectTypeAac; - case SrsAudioCodecIdMP3: - return (srs_audio_sample_rate2number(sample_rate) > 24000) ? SrsMp4ObjectTypeMp1a : SrsMp4ObjectTypeMp3; // 11172 - 3 - default: - return SrsMp4ObjectTypeForbidden; - } -} - -SrsMp4M2tsInitEncoder::SrsMp4M2tsInitEncoder() -{ - writer = NULL; -} - -SrsMp4M2tsInitEncoder::~SrsMp4M2tsInitEncoder() -{ -} - -srs_error_t SrsMp4M2tsInitEncoder::initialize(ISrsWriter* w) -{ - writer = w; - return srs_success; -} - -srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) +srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, int v_tid, int a_tid) { srs_error_t err = srs_success; @@ -6213,7 +7121,7 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) if (true) { SrsUniquePtr ftyp(new SrsMp4FileTypeBox()); - ftyp->major_brand = SrsMp4BoxBrandISO5; + ftyp->major_brand = SrsMp4BoxBrandMP42; // SrsMp4BoxBrandISO5; ftyp->minor_version = 512; ftyp->set_compatible_brands(SrsMp4BoxBrandISO6, SrsMp4BoxBrandMP41); @@ -6231,16 +7139,17 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) mvhd->timescale = 1000; // Use tbn ms. mvhd->duration_in_tbn = 0; - mvhd->next_track_ID = tid; - - if (video) { + mvhd->next_track_ID = 4294967295; // 2^32 - 1 + + // write video track + if (format->vcodec) { SrsMp4TrackBox* trak = new SrsMp4TrackBox(); moov->add_trak(trak); SrsMp4TrackHeaderBox* tkhd = new SrsMp4TrackHeaderBox(); trak->set_tkhd(tkhd); - tkhd->track_ID = mvhd->next_track_ID++; + tkhd->track_ID = v_tid; tkhd->duration = 0; tkhd->width = (format->vcodec->width << 16); tkhd->height = (format->vcodec->height << 16); @@ -6296,6 +7205,10 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) avc1->set_avcC(avcC); avcC->avc_config = format->vcodec->avc_extra_data; + + if (is_protected_ && ((err = config_sample_description_encryption(avc1)) != srs_success)) { + return srs_error_wrap(err, "encrypt avc1 box"); + } } else { SrsMp4VisualSampleEntry* hev1 = new SrsMp4VisualSampleEntry(SrsMp4BoxTypeHEV1); stsd->append(hev1); @@ -6308,6 +7221,10 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) hev1->set_hvcC(hvcC); hvcC->hevc_config = format->vcodec->avc_extra_data; + + if (is_protected_ && ((err = config_sample_description_encryption(hev1)) != srs_success)) { + return srs_error_wrap(err, "encrypt hev1 box"); + } } SrsMp4DecodingTime2SampleBox* stts = new SrsMp4DecodingTime2SampleBox(); @@ -6322,16 +7239,10 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) // TODO: FIXME: need to check using stco or co64? SrsMp4ChunkOffsetBox* stco = new SrsMp4ChunkOffsetBox(); stbl->set_stco(stco); - - SrsMp4MovieExtendsBox* mvex = new SrsMp4MovieExtendsBox(); - moov->set_mvex(mvex); - - SrsMp4TrackExtendsBox* trex = new SrsMp4TrackExtendsBox(); - mvex->set_trex(trex); - - trex->track_ID = tid; - trex->default_sample_description_index = 1; - } else { + } + + // write audio track + if (format->acodec) { SrsMp4TrackBox* trak = new SrsMp4TrackBox(); moov->add_trak(trak); @@ -6339,7 +7250,7 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) tkhd->volume = 0x0100; trak->set_tkhd(tkhd); - tkhd->track_ID = mvhd->next_track_ID++; + tkhd->track_ID = a_tid; tkhd->duration = 0; SrsMp4MediaBox* mdia = new SrsMp4MediaBox(); @@ -6398,6 +7309,9 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) SrsMp4EsdsBox* esds = new SrsMp4EsdsBox(); mp4a->set_esds(esds); + if (is_protected_ && ((err = config_sample_description_encryption(mp4a)) != srs_success)) { + return srs_error_wrap(err, "encrypt mp4a box."); + } SrsMp4ES_Descriptor* es = esds->es; es->ES_ID = 0x02; @@ -6423,17 +7337,31 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) // TODO: FIXME: need to check using stco or co64? SrsMp4ChunkOffsetBox* stco = new SrsMp4ChunkOffsetBox(); stbl->set_stco(stco); - + } + + if (true) { SrsMp4MovieExtendsBox* mvex = new SrsMp4MovieExtendsBox(); moov->set_mvex(mvex); + + // video trex + if (format->vcodec) { + SrsMp4TrackExtendsBox* v_trex = new SrsMp4TrackExtendsBox(); + mvex->add_trex(v_trex); - SrsMp4TrackExtendsBox* trex = new SrsMp4TrackExtendsBox(); - mvex->set_trex(trex); + v_trex->track_ID = v_tid; + v_trex->default_sample_description_index = 1; + } + + // audio trex + if (format->acodec) { + SrsMp4TrackExtendsBox* a_trex = new SrsMp4TrackExtendsBox(); + mvex->add_trex(a_trex); - trex->track_ID = tid; - trex->default_sample_description_index = 1; + a_trex->track_ID = a_tid; + a_trex->default_sample_description_index = 1; + } } - + if ((err = srs_mp4_write_box(writer, moov.get())) != srs_success) { return srs_error_wrap(err, "write moov"); } @@ -6442,6 +7370,60 @@ srs_error_t SrsMp4M2tsInitEncoder::write(SrsFormat* format, bool video, int tid) return err; } +/** + * box->type = 'encv' or 'enca' + * |encv| + * | |sinf| + * | | |frma| + * | | |schm| + * | | |schi| + * | | | |tenc| + */ +srs_error_t SrsMp4M2tsInitEncoder::config_sample_description_encryption(SrsMp4SampleEntry* box) +{ + srs_error_t err = srs_success; + + bool is_video_sample = false; + SrsMp4BoxType original_type = box->type; + + if (original_type == SrsMp4BoxTypeAVC1 || original_type == SrsMp4BoxTypeHEV1) + { + box->type = SrsMp4BoxTypeENCV; + is_video_sample = true; + } else if (original_type == SrsMp4BoxTypeMP4A) { + box->type = SrsMp4BoxTypeENCA; + } else { + return srs_error_new(ERROR_MP4_BOX_ILLEGAL_TYPE, "unknown sample type 0x%x to encrypt", original_type); + } + + SrsMp4ProtectionSchemeInfoBox* sinf = new SrsMp4ProtectionSchemeInfoBox(); + box->append(sinf); + + SrsMp4OriginalFormatBox* frma = new SrsMp4OriginalFormatBox(original_type); + sinf->set_frma(frma); + + SrsMp4SchemeTypeBox* schm = new SrsMp4SchemeTypeBox(); + schm->scheme_type = SrsMp4CENSchemeCBCS; + schm->scheme_version = 0x00010000; + sinf->set_schm(schm); + + SrsMp4SchemeInfoBox* schi = new SrsMp4SchemeInfoBox(); + SrsMp4TrackEncryptionBox* tenc = new SrsMp4TrackEncryptionBox(); + tenc->version = 1; + tenc->default_crypt_byte_block = is_video_sample ? crypt_byte_block_ : 0 ; + tenc->default_skip_byte_block = is_video_sample ? skip_byte_block_ : 0; + tenc->default_is_protected = 1; + tenc->default_per_sample_IV_size = 0; + tenc->default_constant_IV_size = iv_size_; + memcpy(tenc->default_constant_IV, iv_, iv_size_); + memcpy(tenc->default_KID, kid_, 16); + + schi->append(tenc); + sinf->set_schi(schi); + + return err; +} + SrsMp4M2tsSegmentEncoder::SrsMp4M2tsSegmentEncoder() { writer = NULL; @@ -6568,7 +7550,7 @@ srs_error_t SrsMp4M2tsSegmentEncoder::flush(uint64_t& dts) mfhd->sequence_number = sequence_number; SrsMp4TrackFragmentBox* traf = new SrsMp4TrackFragmentBox(); - moof->set_traf(traf); + moof->add_traf(traf); SrsMp4TrackFragmentHeaderBox* tfhd = new SrsMp4TrackFragmentHeaderBox(); traf->set_tfhd(tfhd); @@ -6585,7 +7567,7 @@ srs_error_t SrsMp4M2tsSegmentEncoder::flush(uint64_t& dts) SrsMp4TrackFragmentRunBox* trun = new SrsMp4TrackFragmentRunBox(); traf->set_trun(trun); - if ((err = samples->write(moof.get(), dts)) != srs_success) { + if ((err = samples->write(traf, dts)) != srs_success) { return srs_error_wrap(err, "write samples"); } @@ -6635,3 +7617,254 @@ srs_error_t SrsMp4M2tsSegmentEncoder::flush(uint64_t& dts) return err; } +SrsFmp4SegmentEncoder::SrsFmp4SegmentEncoder() +{ + writer_ = NULL; + sequence_number_ = 0; + decode_basetime_ = 0; + audio_track_id_ = 0; + video_track_id_ = 0; + nb_audios_ = 0; + nb_videos_ = 0; + styp_bytes_ = 0; + mdat_audio_bytes_ = 0; + mdat_video_bytes_ = 0; + audio_samples_ = new SrsMp4SampleManager(); + video_samples_ = new SrsMp4SampleManager(); + + memset(iv_,0,16); + key_ = (unsigned char*)new AES_KEY(); + do_sample_encryption_ = false; +} + +SrsFmp4SegmentEncoder::~SrsFmp4SegmentEncoder() +{ + srs_freep(audio_samples_); + srs_freep(video_samples_); + + AES_KEY* k = (AES_KEY*)key_; + srs_freep(k); +} + + +srs_error_t SrsFmp4SegmentEncoder::initialize(ISrsWriter* w, uint32_t sequence, srs_utime_t basetime, uint32_t v_tid, uint32_t a_tid) +{ + srs_error_t err = srs_success; + + writer_ = w; + sequence_number_ = sequence; + decode_basetime_ = basetime; + video_track_id_ = v_tid; + audio_track_id_ = a_tid; + + return err; +} + +srs_error_t SrsFmp4SegmentEncoder::config_cipher(unsigned char* key, unsigned char* iv) +{ + srs_error_t err = srs_success; + + memcpy(this->iv_, iv, 16); + + AES_KEY* k = (AES_KEY*)this->key_; + if (AES_set_encrypt_key(key, 16 * 8, k)) { + return srs_error_new(ERROR_SYSTEM_FILE_WRITE, "set aes key failed"); + } + do_sample_encryption_ = true; + + return err; +} + +srs_error_t SrsFmp4SegmentEncoder::write_sample(SrsMp4HandlerType ht, uint16_t ft, + uint32_t dts, uint32_t pts, uint8_t* sample, uint32_t nb_sample) +{ + srs_error_t err = srs_success; + + SrsMp4Sample* ps = new SrsMp4Sample(); + + if (ht == SrsMp4HandlerTypeVIDE) { + ps->type = SrsFrameTypeVideo; + ps->frame_type = (SrsVideoAvcFrameType)ft; + ps->index = nb_videos_++; + video_samples_->append(ps); + mdat_video_bytes_ += nb_sample; + } else if (ht == SrsMp4HandlerTypeSOUN) { + ps->type = SrsFrameTypeAudio; + ps->index = nb_audios_++; + audio_samples_->append(ps); + mdat_audio_bytes_ += nb_sample; + } else { + srs_freep(ps); + return err; + } + + ps->tbn = 1000; + ps->dts = dts; + ps->pts = pts; + + // We should copy the sample data, which is shared ptr from video/audio message. + // Furthermore, we do free the data when freeing the sample. + ps->data = new uint8_t[nb_sample]; + memcpy(ps->data, sample, nb_sample); + ps->nb_data = nb_sample; + + return err; +} + +srs_error_t SrsFmp4SegmentEncoder::flush(uint64_t& dts) +{ + srs_error_t err = srs_success; + SrsMp4TrackFragmentRunBox* video_trun = NULL; + SrsMp4TrackFragmentRunBox* audio_trun = NULL; + + if (nb_videos_ == 0 && nb_audios_ == 0) { + return srs_error_new(ERROR_MP4_ILLEGAL_MDAT, "empty samples"); + } + // Create a mdat box. + // its payload will be writen by samples, + // and we will update its header(size) when flush. + SrsUniquePtr mdat(new SrsMp4MediaDataBox()); + + SrsUniquePtr moof(new SrsMp4MovieFragmentBox()); + + SrsMp4MovieFragmentHeaderBox* mfhd = new SrsMp4MovieFragmentHeaderBox(); + moof->set_mfhd(mfhd); + mfhd->sequence_number = sequence_number_; + + // write video traf + if (mdat_video_bytes_ > 0) { + // video traf + SrsMp4TrackFragmentBox* traf = new SrsMp4TrackFragmentBox(); + moof->add_traf(traf); + + SrsMp4TrackFragmentHeaderBox* tfhd = new SrsMp4TrackFragmentHeaderBox(); + traf->set_tfhd(tfhd); + + tfhd->track_id = video_track_id_; + tfhd->flags = SrsMp4TfhdFlagsDefaultBaseIsMoof; + + SrsMp4TrackFragmentDecodeTimeBox* tfdt = new SrsMp4TrackFragmentDecodeTimeBox(); + traf->set_tfdt(tfdt); + + tfdt->version = 1; + tfdt->base_media_decode_time = srsu2ms(decode_basetime_); + + SrsMp4TrackFragmentRunBox* trun = new SrsMp4TrackFragmentRunBox(); + traf->set_trun(trun); + video_trun = trun; + + if ((err = video_samples_->write(traf, dts)) != srs_success) { + return srs_error_wrap(err, "write samples"); + } + + // TODO: write senc, and optional saiz & saio + if (do_sample_encryption_) { + SrsMp4SampleEncryptionBox* senc = new SrsMp4SampleEncryptionBox(0); + // video_samples_; + vector::iterator it; + // write video sample data + for (it = video_samples_->samples.begin(); it != video_samples_->samples.end(); ++it) { + SrsMp4Sample* sample = *it; + // TODO: parse hevc|avc, nalu slice header, and calculate + // sample->data; + // sample->nb_data; + } + + traf->append(senc); + } + } + + // write audio traf + if (mdat_audio_bytes_ > 0) { + // audio traf + SrsMp4TrackFragmentBox* traf = new SrsMp4TrackFragmentBox(); + moof->add_traf(traf); + + SrsMp4TrackFragmentHeaderBox* tfhd = new SrsMp4TrackFragmentHeaderBox(); + traf->set_tfhd(tfhd); + + tfhd->track_id = audio_track_id_; + tfhd->flags = SrsMp4TfhdFlagsDefaultBaseIsMoof; + + SrsMp4TrackFragmentDecodeTimeBox* tfdt = new SrsMp4TrackFragmentDecodeTimeBox(); + traf->set_tfdt(tfdt); + + tfdt->version = 1; + tfdt->base_media_decode_time = srsu2ms(decode_basetime_); + + SrsMp4TrackFragmentRunBox* trun = new SrsMp4TrackFragmentRunBox(); + traf->set_trun(trun); + audio_trun = trun; + + if ((err = audio_samples_->write(traf, dts)) != srs_success) { + return srs_error_wrap(err, "write samples"); + } + + // TODO: write senc, and optional saiz & saio + if (do_sample_encryption_) { + SrsMp4SampleEncryptionBox* senc = new SrsMp4SampleEncryptionBox(0); + // this->iv_; + traf->append(senc); + } + } + + // @remark Remember the data_offset of turn is size(moof)+header(mdat) + int moof_bytes = moof->nb_bytes(); + // rewrite video data_offset + if (video_trun != NULL) { + video_trun->data_offset = (int32_t)(moof_bytes + mdat->sz_header() + 0); + } + + if (audio_trun != NULL) { + audio_trun->data_offset = (int32_t)(moof_bytes + mdat->sz_header() + mdat_video_bytes_); + } + + // srs_trace("seq: %d, moof_bytes=%d, mdat->sz_header=%d", sequence_number_, moof->nb_bytes(), mdat->sz_header()); + // srs_trace("mdat_video_bytes_ = %d, mdat_audio_bytes_ = %d", mdat_video_bytes_, mdat_audio_bytes_); + + if ((err = srs_mp4_write_box(writer_, moof.get())) != srs_success) { + return srs_error_wrap(err, "write moof"); + } + + mdat->nb_data = mdat_video_bytes_ + mdat_audio_bytes_; + // Write mdat. + if (true) { + int nb_data = mdat->sz_header(); + SrsUniquePtr data(new uint8_t[nb_data]); + + SrsUniquePtr buffer(new SrsBuffer((char*)data.get(), nb_data)); + if ((err = mdat->encode(buffer.get())) != srs_success) { + return srs_error_wrap(err, "encode mdat"); + } + + // TODO: FIXME: Ensure all bytes are writen. + if ((err = writer_->write(data.get(), nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write mdat"); + } + + vector::iterator it; + // write video sample data + for (it = video_samples_->samples.begin(); it != video_samples_->samples.end(); ++it) { + SrsMp4Sample* sample = *it; + + // TODO: FIXME: Ensure all bytes are writen. + // TODO: do cbcs encryption here. sample are nalu_length + nalu data. + if ((err = writer_->write(sample->data, sample->nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write sample"); + } + } + + // write audio sample data + for (it = audio_samples_->samples.begin(); it != audio_samples_->samples.end(); ++it) { + SrsMp4Sample* sample = *it; + + // TODO: FIXME: Ensure all bytes are writen. + // TODO: do cbcs encryption here + if ((err = writer_->write(sample->data, sample->nb_data, NULL)) != srs_success) { + return srs_error_wrap(err, "write sample"); + } + } + } + + return err; +} diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp index 23805773e6..597c02a6ae 100644 --- a/trunk/src/kernel/srs_kernel_mp4.hpp +++ b/trunk/src/kernel/srs_kernel_mp4.hpp @@ -113,6 +113,27 @@ enum SrsMp4BoxType SrsMp4BoxTypeSIDX = 0x73696478, // 'sidx' SrsMp4BoxTypeHEV1 = 0x68657631, // 'hev1' SrsMp4BoxTypeHVCC = 0x68766343, // 'hvcC' + SrsMp4BoxTypeSENC = 0x73656e63, // 'senc' + SrsMp4BoxTypeSAIZ = 0x7361697a, // 'saiz' + SrsMp4BoxTypeSAIO = 0x7361696f, // 'saio' + SrsMp4BoxTypeENCV = 0x656e6376, // 'encv' + SrsMp4BoxTypeENCA = 0x656e6361, // 'enca' + SrsMp4BoxTypeSINF = 0x73696e66, // 'sinf' + SrsMp4BoxTypeSCHI = 0x73636869, // 'schi' + SrsMp4BoxTypeTENC = 0x74656e63, // 'tenc' + SrsMp4BoxTypeFRMA = 0x66726d61, // 'frma' + SrsMp4BoxTypeSCHM = 0x7363686d, // 'schm' +}; + +// Common encryption scheme types +// @see ISO-IEC-23001-7.pdf, 4.2 +enum SrsMp4CENSchemeType +{ + SrsMp4CENSchemeCENC = 0x63656e63, // 'cenc' + SrsMp4CENSchemeCBC1 = 0x63626331, // 'cbc1' + SrsMp4CENSchemeCENS = 0x63656e73, // 'cens' + SrsMp4CENSchemeCBCS = 0x63626373, // 'cbcs' + SrsMp4CENSchemeSVE1 = 0x73766531, // 'sve1' }; // 8.4.3.3 Semantics @@ -317,9 +338,9 @@ class SrsMp4MovieFragmentBox : public SrsMp4Box // Get the header of moof. virtual SrsMp4MovieFragmentHeaderBox* mfhd(); virtual void set_mfhd(SrsMp4MovieFragmentHeaderBox* v); - // Get the traf. - virtual SrsMp4TrackFragmentBox* traf(); - virtual void set_traf(SrsMp4TrackFragmentBox* v); + + // Let moof support more than one traf + virtual void add_traf(SrsMp4TrackFragmentBox* v); }; // 8.8.5 Movie Fragment Header Box (mfhd) @@ -499,7 +520,7 @@ class SrsMp4TrackFragmentRunBox : public SrsMp4FullBox public: // The number of samples being added in this run; also the number of rows in the following // table (the rows can be empty) - //uint32_t sample_count; + // uint32_t sample_count; // The following are optional fields public: // added to the implicit or explicit data_offset established in the track fragment header. @@ -710,8 +731,7 @@ class SrsMp4MovieExtendsBox : public SrsMp4Box virtual ~SrsMp4MovieExtendsBox(); public: // Get the track extends box. - virtual SrsMp4TrackExtendsBox* trex(); - virtual void set_trex(SrsMp4TrackExtendsBox* v); + virtual void add_trex(SrsMp4TrackExtendsBox* v); }; // 8.8.3 Track Extends Box(trex) @@ -1869,6 +1889,348 @@ class SrsMp4SegmentIndexBox : public SrsMp4Box virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); }; +// Sample auxiliary information sizes box (saiz) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.7.8, page 62 +// @see https://github.com/gpac/mp4box.js/blob/master/src/parsing/saiz.js +// Syntax +// aligned(8) class SampleAuxiliaryInformationSizesBox extends FullBox('saiz', version=0, flags) +// { +// if (flags & 1) { +// unsigned int(32) aux_info_type; +// unsigned int(32) aux_info_type_parameter; +// } +// unsigned int(8) default_sample_info_size; +// unsigned int(32) sample_count; +// if (default_sample_info_size == 0) { +// unsigned int(8) sample_info_size[sample_count]; +// } +// } +class SrsMp4SampleAuxiliaryInfoSizeBox: public SrsMp4FullBox +{ +public: + uint32_t aux_info_type; + uint32_t aux_info_type_parameter; + + uint8_t default_sample_info_size; + uint32_t sample_count; + std::vector sample_info_sizes; + +public: + SrsMp4SampleAuxiliaryInfoSizeBox(); + virtual ~SrsMp4SampleAuxiliaryInfoSizeBox(); + +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +// Sample auxiliary information offsets box (saio) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.7.9, page 63 +// @see https://github.com/gpac/mp4box.js/blob/master/src/parsing/saio.js +// Syntax +// aligned(8) class SampleAuxiliaryInformationOffsetsBox extends FullBox('saio', version, flags) +// { +// if (flags & 1) { +// unsigned int(32) aux_info_type; +// unsigned int(32) aux_info_type_parameter; +// } +// unsigned int(32) entry_count; +// if (version == 0) { +// unsigned int(32) offset[entry_count]; +// } else { +// unsigned int(64) offset[entry_count]; +// } +// } +class SrsMp4SampleAuxiliaryInfoOffsetBox: public SrsMp4FullBox +{ +public: + uint32_t aux_info_type; + uint32_t aux_info_type_parameter; + // uint32_t entry_count; + std::vector offsets; + +public: + SrsMp4SampleAuxiliaryInfoOffsetBox(); + virtual ~SrsMp4SampleAuxiliaryInfoOffsetBox(); + +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +enum SrsMp4CencSampleEncryptionFlags +{ + SrsMp4CencSampleEncryptionTrackDefault = 0x01, + SrsMp4CencSampleEncryptionUseSubSample = 0x02, +}; + +struct SrsMp4SubSampleEncryptionInfo : public ISrsCodec +{ + uint16_t bytes_of_clear_data; + uint32_t bytes_of_protected_data; + + SrsMp4SubSampleEncryptionInfo(); + virtual ~SrsMp4SubSampleEncryptionInfo(); + + virtual uint64_t nb_bytes(); + virtual srs_error_t encode(SrsBuffer* buf); + virtual srs_error_t decode(SrsBuffer* buf); + + virtual std::stringstream& dumps(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +class SrsMp4SampleEncryptionEntry : public ISrsCodec +{ +public: + // if flags && 0x02 + std::vector subsample_infos; + +public: + SrsMp4SampleEncryptionEntry(SrsMp4FullBox* senc, uint8_t per_sample_iv_size); + virtual ~SrsMp4SampleEncryptionEntry(); + + virtual srs_error_t set_iv(uint8_t* iv, uint8_t iv_size); + virtual uint64_t nb_bytes(); + virtual srs_error_t encode(SrsBuffer* buf); + virtual srs_error_t decode(SrsBuffer* buf); + + virtual std::stringstream& dumps(std::stringstream& ss, SrsMp4DumpContext dc); + +private: + SrsMp4FullBox* senc_; + uint8_t per_sample_iv_size_; + uint8_t* iv_; +}; + +// Sample encryption box (senc) +// @see ISO-IEC-23001-7.pdf 7.2.1 +// @see https://cdn.standards.iteh.ai/samples/84637/c960c91d60ae4da7a2f9380bd7e08642/ISO-IEC-FDIS-23001-7.pdf +// CENC SAI: sample auxiliary information associated with a sample and containing cryptographic information +// such as initialization vector or subsample information +// @see ISO-IEC-23001-7.pdf 7.2.2 +// Syntax +// aligned(8) class SampleEncryptionBox extend FullBox(`senc`, version=0, flags) +// { +// unsigned int(32) sample_count; +// { +// unsigned int(Per_Sample_IV_Size*8) InitializationVector; +// if (flags & 0x000002) +// { +// unsigned int(16) subsample_count; +// { +// unsigned int(16) BytesOfClearData; +// unsigned int(32) BytesOfProtectedData; +// } [ subsample_count ] +// } +// } [ sample_count ] +// } +class SrsMp4SampleEncryptionBox: public SrsMp4FullBox +{ +public: + std::vector entries; + +private: + uint8_t per_sample_iv_size_; + +public: + // @see ISO-IEC-23001-7.pdf 9.1 + // Per_Sample_IV_Size has supported values: 0, 8, 16. + SrsMp4SampleEncryptionBox(uint8_t per_sample_iv_size); + virtual ~SrsMp4SampleEncryptionBox(); +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +// Original Format Box (frma) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.12.2, page 81 +// aligned(8) class OriginalFormatBox(codingname) extends Box ('frma') { +// unsigned int(32) data_format = codingname; +// } +class SrsMp4OriginalFormatBox : public SrsMp4Box +{ +private: + uint32_t data_format_; + +public: + SrsMp4OriginalFormatBox(uint32_t original_format); + virtual ~SrsMp4OriginalFormatBox(); + +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +// Scheme Type Box (schm) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.12.5, page 81 +// aligned(8) class SchemeTypeBox extends FullBox('schm', 0, flags) { +// unsigned int(32) scheme_type; // 4CC identifying the scheme +// unsigned int(32) scheme_version; // scheme version +// if (flags & 0x000001) { +// unsigned int(8) scheme_uri[]; // browser uri +// } +// } +// @see @see ISO-IEC-23001-7.pdf 4.1 +// the scheme_version field SHALL be set to 0x00010000 (Major version 1, Minor version 0). +#define SCHM_SCHEME_URI_MAX_SIZE 128 +class SrsMp4SchemeTypeBox : public SrsMp4FullBox +{ +public: + uint32_t scheme_type; + uint32_t scheme_version; + char scheme_uri[SCHM_SCHEME_URI_MAX_SIZE]; + uint32_t scheme_uri_size; + +public: + SrsMp4SchemeTypeBox(); + virtual ~SrsMp4SchemeTypeBox(); + +public: + virtual void set_scheme_uri(char* uri, uint32_t uri_size); +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +// Scheme Information Box (schi) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.12.6, page 82 +// aligned(8) class SchemeInformationBox extends Box('schi') { +// Box scheme_specific_data[]; +// } +class SrsMp4SchemeInfoBox : public SrsMp4Box +{ +public: + SrsMp4SchemeInfoBox(); + virtual ~SrsMp4SchemeInfoBox(); +}; + +// Protection Scheme Information Box (sinf) +// @see ISO_IEC_14496-12-base-format-2012.pdf, 8.12.1, page 80 +// aligned(8) class ProtectionSchemeInfoBox(fmt) extends Box('sinf') { +// OriginalFormatBox(fmt) original_format; // frma +// SchemeTypeBox scheme_type_box; // optional +// SchemeInformationBox info; // optional +// } +class SrsMp4ProtectionSchemeInfoBox : public SrsMp4Box +{ +public: + SrsMp4ProtectionSchemeInfoBox(); + virtual ~SrsMp4ProtectionSchemeInfoBox(); + +public: + // Get the Original Format Box (frma) + virtual SrsMp4OriginalFormatBox* frma(); + virtual void set_frma(SrsMp4OriginalFormatBox* v); + // Get the Scheme Type Box (schm) + virtual SrsMp4SchemeTypeBox* schm(); + virtual void set_schm(SrsMp4SchemeTypeBox* v); + // Get the Scheme Information Box (schi) + virtual SrsMp4SchemeInfoBox* schi(); + virtual void set_schi(SrsMp4SchemeInfoBox* v); +}; + +// Track Encryption box (tenc) +// @see ISO-IEC-23001-7.pdf 8.2 +// aligned(8) class TrackEncryptionBox extends FullBox('tenc', version, flags=0) { +// unsigned int(8) reserved = 0; +// if (version == 0) { +// unsigned int(8) reserved = 0; +// } else { // version is 1 or greater +// unsigned int(4) default_crypt_byte_block; +// unsigned int(4) default_skip_byte_block; +// } +// unsigned int(8) default_isProtected; +// unsigned int(8) default_Per_Sample_IV_Size; +// unsigned int(8)[16] default_KID; +// if (default_isProtected == 1 && default_Per_Sample_IV_Size == 0) { +// unsigned int(8) default_constant_IV_size; +// unsigned int(8)[default_constant_IV_size] default_constant_IV; +// } +// } +// @see https://developer.apple.com/documentation/http-live-streaming/about-the-common-media-application-format-with-http-live-streaming-hls +// For fragmented MPEG-4 Segments, an EXT-X-KEY tag with a METHOD=SAMPLE-AES attribute indicates that +// the Segment is encrypted using the `cbcs` scheme in ISO/IEC 23001-7. +// HLS supports unencrypted and encrypted with 'cbcs'. +// @see ISO-IEC-23001-7.pdf 10.4.1 Definition +// 'cbcs' AES-CBC subsample pattern encryption scheme. +// The 'scheme_type' field of the scheme Type Box('schm') SHALL be set to 'cbcs'. +// the version of the Track Encryption Box('tenc') SHALL be 1. +// Encrypted video tracks using NAL Structured Video conforming to ISO/IEC 14496-15 SHALL be +// protected using Subsample encryption specified in 9.5, and SHALL use pattern encryption as specified +// in 9.6. As a result, the fields crypt_byte_block and skip_byte_block SHALL NOT be 0. +// Constant IVs SHALL be used; 'default_Per_Sample_IV_Size' and 'Per_Sample_IV_Size', SHALL be 0. +// Tracks other than video are protected using whole-block full-sample encryption as specified in 9.7 and +// hence skip_byte_block SHALL be 0. +// Pattern Block length, i.e. crypt_byte_block + skip_byte_block SHOULD equal 10. +// For all video NAL units, including in 'avc1', the slice header SHALL be unencrypted. +// The first complete byte of video slice data(following the video slice header) SHALL begin a single +// Subsample protected byte range indicated by the start of BytesOfProtectedData, which extends to +// the end of the video NAL. +// NOTE 1 For AVC VCL NAL units, the encryption pattern starts at an offset rounded to the next byte after +// the slice header, i.e. on the first full byte of slice data. For HEVC, the encryption pattern starts after +// the byte_alignment() field that terminates the slice_segment_header(), i.e. on the first byte of slice data. +// +// @see ISO-IEC-23001-7.pdf 10.4.2 'cbcs' AES-CBC mode pattern encryption scheme application(informative) +// An encrypt:skip pattern of 1:9(i.e. 10% partial encryption) is recommended. Even though the syntax +// allows many different encryption patterns, a pattern of ten Blocks is recommended. This means that the +// skipped Blocks will be (10-N). The number of encrypted cipher blocks N can span multiple contiguous +// 16-byte Blocks(e.g. three encrypted Blocks followed by seven unencrypted Blocks would result in 30% +// partial encryption of the video data). +// For example, to achieve 10 % encryption, the first Block of the pattern is encrypted and the following +// nine Blocks are left unencrypted. The pattern is repeated every 160 bytes of the protected range, until +// the end of the range. If the protected range of the slice body is not a multiple of the pattern length +// (e.g. 160 bytes), then the pattern sequence applies to the included whole 16-byte Blocks and a partial +// 16-byte Block that may remain where the pattern is terminated by the byte length of the range +// BytesOfProtectedData, is left unencrypted. +// +// @see ISO-IEC-23001-7.pdf 9.7 Whole-block full sample encryption +// In whole-block full sample encryption, the entire sample is protected. Every sample is encrypted +// starting at offset 0(there is no unprotected preamble) up to the last 16-byte boundary, leaving any +// trailing 0-15 bytes in the clear. The IV is reset at every sample. +class SrsMp4TrackEncryptionBox : public SrsMp4FullBox +{ +public: + uint8_t reserved; + uint8_t reserved_2; + uint8_t default_crypt_byte_block; + uint8_t default_skip_byte_block; + uint8_t default_is_protected; + uint8_t default_per_sample_IV_size; + uint8_t default_KID[16]; + uint8_t default_constant_IV_size; + uint8_t default_constant_IV[16]; +public: + SrsMp4TrackEncryptionBox(); + virtual ~SrsMp4TrackEncryptionBox(); + +public: + virtual void set_default_constant_IV(uint8_t* iv, uint8_t iv_size); + +protected: + virtual int nb_header(); + virtual srs_error_t encode_header(SrsBuffer* buf); + virtual srs_error_t decode_header(SrsBuffer* buf); +public: + virtual std::stringstream& dumps_detail(std::stringstream& ss, SrsMp4DumpContext dc); +}; + +// TODO: add SchemeTypeBox(schm), set scheme_type=cbcs + // Generally, a MP4 sample contains a frame, for example, a video frame or audio frame. class SrsMp4Sample { @@ -1931,7 +2293,7 @@ class SrsMp4SampleManager virtual srs_error_t write(SrsMp4MovieBox* moov); // Write the samples info to moof. // @param The dts is the dts of last segment. - virtual srs_error_t write(SrsMp4MovieFragmentBox* moof, uint64_t dts); + virtual srs_error_t write(SrsMp4TrackFragmentBox* traf, uint64_t dts); private: virtual srs_error_t write_track(SrsFrameType track, SrsMp4DecodingTime2SampleBox* stts, SrsMp4SyncSampleBox* stss, SrsMp4CompositionTime2SampleBox* ctts, @@ -2111,22 +2473,67 @@ class SrsMp4Encoder }; // A fMP4 encoder, to write the init.mp4 with sequence header. +// TODO: What the M2ts short for? class SrsMp4M2tsInitEncoder { private: ISrsWriter* writer; + +private: + uint8_t crypt_byte_block_; + uint8_t skip_byte_block_; + unsigned char kid_[16]; + unsigned char iv_[16]; + uint8_t iv_size_; + bool is_protected_; + public: SrsMp4M2tsInitEncoder(); virtual ~SrsMp4M2tsInitEncoder(); public: // Initialize the encoder with a writer w. virtual srs_error_t initialize(ISrsWriter* w); + // set encryption + // TODO: review kid(map to a key) and iv, which are shared between audio/video tracks. + virtual void config_encryption(uint8_t crypt_byte_block, uint8_t skip_byte_block, unsigned char* kid, unsigned char* iv, uint8_t iv_size); // Write the sequence header. + // TODO: merge this method to its sibling. virtual srs_error_t write(SrsFormat* format, bool video, int tid); + + /** + * The mp4 box format for init.mp4. + * + * |ftyp| + * |moov| + * | |mvhd| + * | |trak| + * | |trak| + * | |....| + * | |mvex| + * | | |trex| + * | | |trex| + * | | |....| + * + * Write the sequence header with both video and audio track. + */ + virtual srs_error_t write(SrsFormat* format, int v_tid, int a_tid); + +private: + /** + * box->type = 'encv' or 'enca' + * |encv| + * | |sinf| + * | | |frma| + * | | |schm| + * | | |schi| + * | | | |tenc| + */ + virtual srs_error_t config_sample_description_encryption(SrsMp4SampleEntry* box); }; // A fMP4 encoder, to cache segments then flush to disk, because the fMP4 should write // trun box before mdat. +// TODO: fmp4 support package more than one tracks. class SrsMp4M2tsSegmentEncoder { private: @@ -2160,6 +2567,52 @@ class SrsMp4M2tsSegmentEncoder virtual srs_error_t flush(uint64_t& dts); }; +// A fMP4 encoder, to cache segments then flush to disk, because the fMP4 should write +// trun box before mdat. +// TODO: fmp4 support package more than one tracks. +class SrsFmp4SegmentEncoder +{ +private: + ISrsWriter* writer_; + uint32_t sequence_number_; + // TODO: audio, video may have different basetime. + srs_utime_t decode_basetime_; + uint32_t audio_track_id_; + uint32_t video_track_id_; +private: + uint32_t nb_audios_; + uint32_t nb_videos_; + uint32_t styp_bytes_; + uint64_t mdat_audio_bytes_; + uint64_t mdat_video_bytes_; + SrsMp4SampleManager* audio_samples_; + SrsMp4SampleManager* video_samples_; + unsigned char* key_; + unsigned char iv_[16]; + bool do_sample_encryption_; +public: + SrsFmp4SegmentEncoder(); + virtual ~SrsFmp4SegmentEncoder(); +public: + // Initialize the encoder with a writer w. + virtual srs_error_t initialize(ISrsWriter* w, uint32_t sequence, srs_utime_t basetime, uint32_t v_tid, uint32_t a_tid); + // config cipher + virtual srs_error_t config_cipher(unsigned char* key, unsigned char* iv); + // Cache a sample. + // @param ht, The sample handler type, audio/soun or video/vide. + // @param ft, The frame type. For video, it's SrsVideoAvcFrameType. + // @param dts The output dts in milliseconds. + // @param pts The output pts in milliseconds. + // @param sample The output payload, user must free it. + // @param nb_sample The output size of payload. + // @remark All samples are RAW AAC/AVC data, because sequence header is writen to init.mp4. + virtual srs_error_t write_sample(SrsMp4HandlerType ht, uint16_t ft, + uint32_t dts, uint32_t pts, uint8_t* sample, uint32_t nb_sample); + // Flush the encoder, to write the moof and mdat. + virtual srs_error_t flush(uint64_t& dts); +}; + + // LCOV_EXCL_START ///////////////////////////////////////////////////////////////////////////////// // MP4 dumps functions. diff --git a/trunk/src/protocol/srs_protocol_http_stack.cpp b/trunk/src/protocol/srs_protocol_http_stack.cpp index b280fca387..c11e4eac3f 100644 --- a/trunk/src/protocol/srs_protocol_http_stack.cpp +++ b/trunk/src/protocol/srs_protocol_http_stack.cpp @@ -405,13 +405,15 @@ srs_error_t SrsHttpFileServer::serve_http(ISrsHttpResponseWriter* w, ISrsHttpMes // use vod stream for .flv/.fhv if (srs_string_ends_with(fullpath, ".flv") || srs_string_ends_with(fullpath, ".fhv")) { return serve_flv_file(w, r, fullpath); - } else if (srs_string_ends_with(fullpath, ".mp4")) { - return serve_mp4_file(w, r, fullpath); } else if (srs_string_ends_with(upath, ".m3u8")) { return serve_m3u8_file(w, r, fullpath); - } else if (srs_string_ends_with(upath, ".ts")) { + } else if (srs_string_ends_with(upath, ".ts") || + srs_string_ends_with(upath, ".m4s") || + srs_path_basename(upath) == "init.mp4") { return serve_ts_file(w, r, fullpath); - } + } else if (srs_string_ends_with(fullpath, ".mp4")) { + return serve_mp4_file(w, r, fullpath); + } // serve common static file. return serve_file(w, r, fullpath); diff --git a/trunk/src/protocol/srs_protocol_http_stack.hpp b/trunk/src/protocol/srs_protocol_http_stack.hpp index fcef24401d..90c5a0d09c 100644 --- a/trunk/src/protocol/srs_protocol_http_stack.hpp +++ b/trunk/src/protocol/srs_protocol_http_stack.hpp @@ -352,6 +352,7 @@ class SrsHttpFileServer : public ISrsHttpHandler virtual srs_error_t serve_flv_file(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); virtual srs_error_t serve_mp4_file(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); virtual srs_error_t serve_m3u8_file(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); + // the ts file including: .ts .m4s init.mp4 virtual srs_error_t serve_ts_file(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); protected: // When access flv file with x.flv?start=xxx @@ -371,6 +372,7 @@ class SrsHttpFileServer : public ISrsHttpHandler // Remark 2: // If use two same "hls_ctx" in different requests, SRS cannot detect so that they will be treated as one. virtual srs_error_t serve_m3u8_ctx(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); + // the ts file including: .ts .m4s init.mp4 virtual srs_error_t serve_ts_ctx(ISrsHttpResponseWriter* w, ISrsHttpMessage* r, std::string fullpath); protected: // Copy the fs to response writer in size bytes. diff --git a/trunk/src/utest/srs_utest_config.cpp b/trunk/src/utest/srs_utest_config.cpp index 8999dabfeb..fa78053ae3 100644 --- a/trunk/src/utest/srs_utest_config.cpp +++ b/trunk/src/utest/srs_utest_config.cpp @@ -3732,12 +3732,14 @@ VOID TEST(ConfigMainTest, CheckVhostConfig5) if (true) { MockSrsConfig conf; - HELPER_ASSERT_SUCCESS(conf.parse(_MIN_OK_CONF "vhost ossrs.net{hls{hls_keys on;hls_fragments_per_key 5;hls_key_file xxx;hls_key_file_path xxx2;hls_key_url xxx3;}}")); + HELPER_ASSERT_SUCCESS(conf.parse(_MIN_OK_CONF "vhost ossrs.net{hls{hls_keys on;hls_fragments_per_key 5;hls_key_file xxx;hls_key_file_path xxx2;hls_key_url xxx3;hls_use_fmp4 on;hls_fmp4_file xx.m4s;}}")); EXPECT_TRUE(conf.get_hls_keys("ossrs.net")); EXPECT_EQ(5, conf.get_hls_fragments_per_key("ossrs.net")); EXPECT_STREQ("xxx", conf.get_hls_key_file("ossrs.net").c_str()); EXPECT_STREQ("xxx2", conf.get_hls_key_file_path("ossrs.net").c_str()); EXPECT_STREQ("xxx3", conf.get_hls_key_url("ossrs.net").c_str()); + EXPECT_TRUE(conf.get_hls_use_fmp4("ossrs.net")); + EXPECT_STREQ("xx.m4s", conf.get_hls_fmp4_file("ossrs.net").c_str()); } if (true) { @@ -5046,6 +5048,18 @@ VOID TEST(ConfigEnvTest, CheckEnvValuesHls) SrsSetEnvConfig(hls_dts_directly, "SRS_VHOST_HLS_HLS_DTS_DIRECTLY", "off"); EXPECT_FALSE(conf.get_vhost_hls_dts_directly("__defaultVhost__")); + + SrsSetEnvConfig(hls_use_fmp4_on, "SRS_VHOST_HLS_HLS_USE_FMP4", "on"); + EXPECT_TRUE(conf.get_hls_use_fmp4("__defaultVhost__")); + + SrsSetEnvConfig(hls_use_fmp4_off, "SRS_VHOST_HLS_HLS_USE_FMP4", "off"); + EXPECT_FALSE(conf.get_hls_use_fmp4("__defaultVhost__")); + + SrsSetEnvConfig(hls_use_fmp4_unexpected, "SRS_VHOST_HLS_HLS_USE_FMP4", "xx"); + EXPECT_FALSE(conf.get_hls_use_fmp4("__defaultVhost__")); + + SrsSetEnvConfig(hls_fmp4_file, "SRS_VHOST_HLS_HLS_FMP4_FILE", "xxx.m4s"); + EXPECT_STREQ("xxx.m4s", conf.get_hls_fmp4_file("__defaultVhost__").c_str()); } } diff --git a/trunk/src/utest/srs_utest_mp4.cpp b/trunk/src/utest/srs_utest_mp4.cpp index 77f9355fea..5499bb1fc0 100644 --- a/trunk/src/utest/srs_utest_mp4.cpp +++ b/trunk/src/utest/srs_utest_mp4.cpp @@ -898,11 +898,10 @@ VOID TEST(KernelMp4Test, TREXBox) } SrsMp4MovieExtendsBox box; - EXPECT_TRUE(NULL == box.trex()); SrsMp4TrackExtendsBox* trex = new SrsMp4TrackExtendsBox(); - box.set_trex(trex); - EXPECT_TRUE(trex == box.trex()); + box.add_trex(trex); + EXPECT_TRUE(trex == box.get(SrsMp4BoxTypeTREX)); } VOID TEST(KernelMp4Test, TKHDBox)