Skip to content

Commit

Permalink
Merge pull request #7 from OpenHD/consti-dev
Browse files Browse the repository at this point in the history
Add X20 autodetection
  • Loading branch information
Consti10 authored Apr 19, 2024
2 parents e643408 + e466c05 commit 83c008c
Show file tree
Hide file tree
Showing 12 changed files with 1,011 additions and 13 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set(SOURCE_FILES drm.c
gstrtpreceiver.h
#
assembly/memcpymove-v7l.S
parse_x20_util.h
)

include_directories("/usr/include/libdrm" "/usr/include/cairo" )
Expand Down
42 changes: 40 additions & 2 deletions copy_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <assert.h>
#include <string.h>


//#define __ARM__
/*#include <arm_neon.h>
void memcpy_neon_8bytes(uint8_t* region2, const uint8_t* region1, size_t length){
Expand Down Expand Up @@ -47,6 +47,42 @@ void memcpy_neon_aligned(void* dst, const void * src, size_t length){
}
}*/

// From https://stackoverflow.com/questions/34888683/arm-neon-memcpy-optimized-for-uncached-memory
// and https://stackoverflow.com/questions/61210517/memcpy-for-arm-uncached-memory-for-arm64
#ifdef __ARM__
void my_copy(volatile void *dst, volatile const void *src, int sz){
if (sz & 63) {
sz = (sz & -64) + 64;
}
asm volatile ("NEONCopyPLD: \n"
"sub %[dst], %[dst], #64 \n"
"1: \n"
"ldnp q0, q1, [%[src]] \n"
"ldnp q2, q3, [%[src], #32] \n"
"add %[dst], %[dst], #64 \n"
"subs %[sz], %[sz], #64 \n"
"add %[src], %[src], #64 \n"
"stnp q0, q1, [%[dst]] \n"
"stnp q2, q3, [%[dst], #32] \n"
"b.gt 1b \n"
: [dst]"+r"(dst), [src]"+r"(src), [sz]"+r"(sz) : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
}
// https://wx.comake.online/doc/doc/SigmaStarDocs-SSC9341_Ispahan-ULS00V040-20210913/customer/faq/i6b0/system/i6b0/neon.html
// https://community.nxp.com/t5/i-MX-Processors/iMX6-EIM-transfer-speed-with-using-NEON-vld-vst-instructions/m-p/312256
/*void __attribute__ ((noinline)) memcpy_neon_pld(void *dest, const void *src, size_t n)
{
asm(
"NEONCopyPLD:\n"
" pld [r1, #0xC0]\n" //预取数据
" vldm r1!,{d0-d7}\n" //从参数一r0(src)加载8*8=64个单通道8位数据
" vstm r0!,{d0-d7}\n" //存储在目的地址r1(dst)中,同样是64个8位单通道8位数据
" subs r2,r2,#0x40\n" //循环跳转参数,每次减64,总共循环次数=row*col*4/64
" bgt NEONCopyPLD\n" //以前这里是bge,有问题。现在改成bgt。
);
}*/
#endif


#ifdef __ARM__
extern "C"{
// The memcpymove-v7l.S impl
Expand Down Expand Up @@ -74,7 +110,9 @@ struct memcpy_args_t {
void* memcpy_data_function(void* args_uncast){
struct memcpy_args_t* args=(struct memcpy_args_t*)args_uncast;
#ifdef __ARM__
mempcpy(args->dst,args->src,args->len);
//mempcpy(args->dst,args->src,args->len);
my_copy(args->dst,args->src,args->len);
//memcpy_neon_pld(args->dst,args->src,args->len);
#else
memcpy(args->dst,args->src,args->len);
#endif
Expand Down
2 changes: 1 addition & 1 deletion gstrtpreceiver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ namespace pipeline{
if(codec==VideoCodec::H264){
std::stringstream ss;
ss<<"video/x-h264";
ss<<", stream-format=\"byte-stream\",alignment=au";
ss<<", stream-format=\"byte-stream\",alignment=nal";
//ss<<", alignment=\"nal\"";
ss<<" ! ";
return ss.str();
Expand Down
66 changes: 57 additions & 9 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ extern "C" {
#ifdef __cplusplus
#include "gstrtpreceiver.h"
#include "SchedulingHelper.hpp"
#include "parse_x20_util.h"
#endif

// This buffer size has no effect on the latency -
Expand Down Expand Up @@ -96,7 +97,8 @@ int video_zpos = 1;
int develop_rendering_mode=0;
bool decode_h265=false;
int gst_udp_port=-1;
bool x20_apply_fixes=false;
bool x20_force=false;
bool x20_auto=false;
struct TSAccumulator m_decoding_latency;
// NOTE: Does not track latency to end completely
struct TSAccumulator m_decode_and_handover_display_latency;
Expand Down Expand Up @@ -153,7 +155,7 @@ void map_copy_unmap(int fd_src,int fd_dst,int memory_size){
uint64_t before_memcpy=get_time_ms();
//memcpy_threaded(test_buffer,src_p,memory_size,3);
//memcpy_threaded(dst_p,test_buffer,memory_size,3);
memcpy_threaded(dst_p,src_p,memory_size,3);
memcpy_threaded(dst_p,src_p,memory_size,2);

end_sync(fd_src,false);
end_sync(fd_dst,true);
Expand Down Expand Up @@ -879,15 +881,15 @@ bool feed_packet_to_decoder(MppPacket *packet,void* data_p,int data_len){
decoder_stalled_count++;
printf("Cannot feed decoder, stalled %d ?\n",decoder_stalled_count);
return false;
break;
}
usleep(2 * 1000);
}
return true;
}

void configure_x20(MppPacket *packet){
FILE *fp = fopen("/usr/local/bin/Header.h264", "rb");
printf("Applying x20 hack\n");
FILE *fp = fopen("/usr/local/bin/x20_header.h264", "rb");
assert(fp);
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
Expand All @@ -900,6 +902,8 @@ void configure_x20(MppPacket *packet){
feed_packet_to_decoder(packet,tmp_data.data(),size);
}

uint64_t first_frame_ms=0;
bool air_unit_discovery_finished= false;
void read_gstreamerpipe_stream(MppPacket *packet){
assert(gst_udp_port!=-1);
GstRtpReceiver receiver{gst_udp_port,decode_h265 ? 1 : 0};
Expand All @@ -911,10 +915,40 @@ void read_gstreamerpipe_stream(MppPacket *packet){
SchedulingHelper::set_thread_params_max_realtime("DisplayThread",SchedulingHelper::PRIORITY_REALTIME_LOW);
first= false;
}
if(!x20_force && x20_auto){
// X20 auto detection
if(!air_unit_discovery_finished){
const int x20_check=check_for_x20(frame->data(),frame->size());
if(x20_check==1){
// We have an x20
configure_x20(packet);
air_unit_discovery_finished= true;
}else if(x20_check==2){
// We have no x20 (definitely)
air_unit_discovery_finished= true;
}else{
// Unknown if x20 or not
// As a bup, we assume no x20 after X seconds
if(first_frame_ms==0){
first_frame_ms=get_time_ms();
return ;
}else{
const auto elapsed=get_time_ms()-first_frame_ms;
if(elapsed>5*1000){
// Assume no x20
printf("X20 or not unknown for > 5 seconds\n");
air_unit_discovery_finished= true;
}else{
// Skip this frame
return ;
}
}
}
}
}
feed_packet_to_decoder(packet,frame->data(),frame->size());
};
if(x20_apply_fixes){
printf("Applying x20 hack\n");
if(x20_force){
configure_x20(packet);
}
receiver.start_receiving(cb);
Expand Down Expand Up @@ -1012,7 +1046,10 @@ void printHelp() {
"\n"
" --rmode - different rendering modes for development \n"
"\n"
" --x20 - specific x20 fixe(s) \n"
" --x20-force - forces specific x20 fixe(s) (no autodetect), only works with x20\n"
"\n"
" --x20-auto - auto detect x20 or not as air, works with x20 AND rpi\n"
"\n"
"\n", __DATE__
);
}
Expand Down Expand Up @@ -1155,14 +1192,25 @@ int main(int argc, char **argv)
develop_rendering_mode= atoi((char*)mode);
continue;
}
__OnArgument("--x20") {
__OnArgument("--x20-force") {
const char* mode = __ArgValue;
x20_force= true;
continue;
}
__OnArgument("--x20-auto") {
const char* mode = __ArgValue;
x20_apply_fixes= true;
x20_auto= true;
continue;
}

__EndParseConsoleArguments__

// X20 force and x20 auto are exclusive
if(x20_auto && x20_force){
printf("Cannot use x20 auto and force at the same time\n");
assert(false);
}

if (enable_osd == 0 ) {
video_zpos = 4;
}
Expand Down
110 changes: 110 additions & 0 deletions nalu/CodecConfigFinder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
//
// Created by geier on 07/02/2020.
//

#ifndef LIVEVIDEO10MS_KEYFRAMEFINDER_HPP
#define LIVEVIDEO10MS_KEYFRAMEFINDER_HPP

#include <memory>
#include <vector>

#include "NALU.hpp"
// #include <qdebug.h>
#include <array>

// Takes a continuous stream of NALUs and save SPS / PPS data
// For later use
class CodecConfigFinder {
private:
std::unique_ptr<NALUBuffer> SPS = nullptr;
std::unique_ptr<NALUBuffer> PPS = nullptr;
// VPS are only used in H265
std::unique_ptr<NALUBuffer> VPS = nullptr;

public:
bool save_if_config(const NALU& nalu) {
if (nalu.getSize() <= 0) return false;
if (nalu.isSPS()) {
SPS = std::make_unique<NALUBuffer>(nalu);
// qDebug()<<"SPS found";
// qDebug()<<nalu.get_sps_as_string().c_str();
return true;
} else if (nalu.isPPS()) {
PPS = std::make_unique<NALUBuffer>(nalu);
// qDebug()<<"PPS found";
return true;
} else if (nalu.IS_H265_PACKET && nalu.isVPS()) {
VPS = std::make_unique<NALUBuffer>(nalu);
// qDebug()<<"VPS found";
return true;
}
// qDebug()<<"not a keyframe"<<(int)nalu.getDataWithoutPrefix()[0];
return false;
}
// H264 needs sps and pps
// H265 needs sps,pps and vps
bool all_config_available(const bool IS_H265 = false) {
if (IS_H265) {
return SPS != nullptr && PPS != nullptr && VPS != nullptr;
}
return SPS != nullptr && PPS != nullptr;
}
std::shared_ptr<std::vector<uint8_t>> get_config_data(
const bool IS_H265 = false) {
assert(all_config_available(IS_H265));
if (IS_H265) {
// Looks like avcodec wants the VPS before sps and pps
auto& sps = SPS->get_nal();
auto& pps = PPS->get_nal();
auto& vps = VPS->get_nal();
const auto size = sps.getSize() + pps.getSize() + vps.getSize();
auto ret = std::make_unique<std::vector<uint8_t>>(size);
std::memcpy(ret->data(), vps.getData(), vps.getSize());
auto offset = vps.getSize();
std::memcpy(ret->data() + offset, sps.getData(), sps.getSize());
offset += sps.getSize();
std::memcpy(ret->data() + offset, pps.getData(), pps.getSize());
return ret;
}
auto& sps = SPS->get_nal();
auto& pps = PPS->get_nal();
const auto size = sps.getSize() + pps.getSize();
auto ret = std::make_shared<std::vector<uint8_t>>(size);
std::memcpy(ret->data(), sps.getData(), sps.getSize());
std::memcpy(ret->data() + sps.getSize(), pps.getData(), pps.getSize());
return ret;
}
// returns false if the config data (SPS,PPS,optional VPS) has changed
// true otherwise
bool check_is_still_same_config_data(const NALU& nalu) {
assert(all_config_available(nalu.IS_H265_PACKET));
if (nalu.isSPS()) {
return compare(nalu, SPS->get_nal());
} else if (nalu.isPPS()) {
return compare(nalu, PPS->get_nal());
} else if (nalu.IS_H265_PACKET && nalu.isVPS()) {
return compare(nalu, VPS->get_nal());
}
return true;
}
static void appendNaluData(std::vector<uint8_t>& buff, const NALU& nalu) {
buff.insert(buff.begin(), nalu.getData(), nalu.getData() + nalu.getSize());
}
void reset() {
SPS = nullptr;
PPS = nullptr;
VPS = nullptr;
}
const NALU& getCSD0() const { return SPS->get_nal(); }
const NALU& getCSD1() const { return PPS->get_nal(); }
const NALU& getVPS() const { return VPS->get_nal(); }

public:
static bool compare(const NALU& n1, const NALU& n2) {
if (n1.getSize() != n2.getSize()) return false;
const int res = std::memcmp(n1.getData(), n2.getData(), n1.getSize());
return res == 0;
}
};

#endif // LIVEVIDEO10MS_KEYFRAMEFINDER_HPP
Loading

0 comments on commit 83c008c

Please sign in to comment.