您好很感谢官方能够支持这个跟踪的项目,我试了一下单跑这个yolov5的例子的话他的推流效果因为检测目标数过多导致传输延迟有点大,而且板子的资源消耗程度很大,这种情况下可能如果再加模型会更加吃不消,我想尽可能减少检测目标数并且能够实现只检测人的逻辑其他的物体不予检测这个会不会能够有效提高这个推理和编码的速度这是我修改的代码
Code: Select all
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <pthread.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/poll.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <vector>
#include <termios.h>
#include "rtsp_demo.h"
#include "sample_comm.h"
#include "yolov5.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
// disp size
int width = 720;
int height = 480;
// model size
int model_width = 640;
int model_height = 640;
float scale ;
int leftPadding ;
int topPadding ;
//handle while function
static bool quit=false;
static RK_S32 test_venc_init(int chnId, int width, int height, RK_CODEC_ID_E enType) {
printf("%s\n",__func__);
VENC_RECV_PIC_PARAM_S stRecvParam;
VENC_CHN_ATTR_S stAttr;
memset(&stAttr, 0, sizeof(VENC_CHN_ATTR_S));
// RTSP H264
stAttr.stVencAttr.enType = enType;
//stAttr.stVencAttr.enPixelFormat = RK_FMT_YUV420SP;
stAttr.stVencAttr.enPixelFormat = RK_FMT_RGB888;
stAttr.stVencAttr.u32Profile = H264E_PROFILE_MAIN;
stAttr.stVencAttr.u32PicWidth = width;
stAttr.stVencAttr.u32PicHeight = height;
stAttr.stVencAttr.u32VirWidth = width;
stAttr.stVencAttr.u32VirHeight = height;
stAttr.stVencAttr.u32StreamBufCnt = 2;
stAttr.stVencAttr.u32BufSize = width * height * 3 / 2;
stAttr.stVencAttr.enMirror = MIRROR_NONE;
stAttr.stRcAttr.enRcMode = VENC_RC_MODE_H264CBR;
stAttr.stRcAttr.stH264Cbr.u32BitRate = 3 * 1024;
stAttr.stRcAttr.stH264Cbr.u32Gop = 1;
RK_MPI_VENC_CreateChn(chnId, &stAttr);
memset(&stRecvParam, 0, sizeof(VENC_RECV_PIC_PARAM_S));
stRecvParam.s32RecvPicNum = -1;
RK_MPI_VENC_StartRecvFrame(chnId, &stRecvParam);
return 0;
}
// demo板dev默认都是0,根据不同的channel 来选择不同的vi节点
int vi_dev_init() {
printf("%s\n", __func__);
int ret = 0;
int devId = 0;
int pipeId = devId;
VI_DEV_ATTR_S stDevAttr;
VI_DEV_BIND_PIPE_S stBindPipe;
memset(&stDevAttr, 0, sizeof(stDevAttr));
memset(&stBindPipe, 0, sizeof(stBindPipe));
// 0. get dev config status
ret = RK_MPI_VI_GetDevAttr(devId, &stDevAttr);
if (ret == RK_ERR_VI_NOT_CONFIG) {
// 0-1.config dev
ret = RK_MPI_VI_SetDevAttr(devId, &stDevAttr);
if (ret != RK_SUCCESS) {
printf("RK_MPI_VI_SetDevAttr %x\n", ret);
return -1;
}
} else {
printf("RK_MPI_VI_SetDevAttr already\n");
}
// 1.get dev enable status
ret = RK_MPI_VI_GetDevIsEnable(devId);
if (ret != RK_SUCCESS) {
// 1-2.enable dev
ret = RK_MPI_VI_EnableDev(devId);
if (ret != RK_SUCCESS) {
printf("RK_MPI_VI_EnableDev %x\n", ret);
return -1;
}
// 1-3.bind dev/pipe
stBindPipe.u32Num = pipeId;
stBindPipe.PipeId[0] = pipeId;
ret = RK_MPI_VI_SetDevBindPipe(devId, &stBindPipe);
if (ret != RK_SUCCESS) {
printf("RK_MPI_VI_SetDevBindPipe %x\n", ret);
return -1;
}
} else {
printf("RK_MPI_VI_EnableDev already\n");
}
return 0;
}
int vi_chn_init(int channelId, int width, int height) {
int ret;
int buf_cnt = 2;
// VI init
VI_CHN_ATTR_S vi_chn_attr;
memset(&vi_chn_attr, 0, sizeof(vi_chn_attr));
vi_chn_attr.stIspOpt.u32BufCount = buf_cnt;
vi_chn_attr.stIspOpt.enMemoryType =
VI_V4L2_MEMORY_TYPE_DMABUF; // VI_V4L2_MEMORY_TYPE_MMAP;
vi_chn_attr.stSize.u32Width = width;
vi_chn_attr.stSize.u32Height = height;
vi_chn_attr.enPixelFormat = RK_FMT_YUV420SP;
vi_chn_attr.enCompressMode = COMPRESS_MODE_NONE; // COMPRESS_AFBC_16x16;
vi_chn_attr.u32Depth = 2;
ret = RK_MPI_VI_SetChnAttr(0, channelId, &vi_chn_attr);
ret |= RK_MPI_VI_EnableChn(0, channelId);
if (ret) {
printf("ERROR: create VI error! ret=%d\n", ret);
return ret;
}
return ret;
}
int test_vpss_init(int VpssChn, int width, int height) {
printf("%s\n",__func__);
int s32Ret;
VPSS_CHN_ATTR_S stVpssChnAttr;
VPSS_GRP_ATTR_S stGrpVpssAttr;
int s32Grp = 0;
stGrpVpssAttr.u32MaxW = 4096;
stGrpVpssAttr.u32MaxH = 4096;
stGrpVpssAttr.enPixelFormat = RK_FMT_YUV420SP;
stGrpVpssAttr.stFrameRate.s32SrcFrameRate = -1;
stGrpVpssAttr.stFrameRate.s32DstFrameRate = -1;
stGrpVpssAttr.enCompressMode = COMPRESS_MODE_NONE;
stVpssChnAttr.enChnMode = VPSS_CHN_MODE_USER;
stVpssChnAttr.enDynamicRange = DYNAMIC_RANGE_SDR8;
stVpssChnAttr.enPixelFormat = RK_FMT_RGB888;
stVpssChnAttr.stFrameRate.s32SrcFrameRate = -1;
stVpssChnAttr.stFrameRate.s32DstFrameRate = -1;
stVpssChnAttr.u32Width = width;
stVpssChnAttr.u32Height = height;
stVpssChnAttr.enCompressMode = COMPRESS_MODE_NONE;
s32Ret = RK_MPI_VPSS_CreateGrp(s32Grp, &stGrpVpssAttr);
if (s32Ret != RK_SUCCESS) {
return s32Ret;
}
s32Ret = RK_MPI_VPSS_SetChnAttr(s32Grp, VpssChn, &stVpssChnAttr);
if (s32Ret != RK_SUCCESS) {
return s32Ret;
}
s32Ret = RK_MPI_VPSS_EnableChn(s32Grp, VpssChn);
if (s32Ret != RK_SUCCESS) {
return s32Ret;
}
s32Ret = RK_MPI_VPSS_StartGrp(s32Grp);
if (s32Ret != RK_SUCCESS) {
return s32Ret;
}
return s32Ret;
}
cv::Mat letterbox(cv::Mat input)
{
float scaleX = (float)model_width / (float)width; //0.888
float scaleY = (float)model_height / (float)height; //1.125
scale = scaleX < scaleY ? scaleX : scaleY;
int inputWidth = (int)((float)width * scale);
int inputHeight = (int)((float)height * scale);
leftPadding = (model_width - inputWidth) / 2;
topPadding = (model_height - inputHeight) / 2;
cv::Mat inputScale;
cv::resize(input, inputScale, cv::Size(inputWidth,inputHeight), 0, 0, cv::INTER_LINEAR);
cv::Mat letterboxImage(640, 640, CV_8UC3,cv::Scalar(0, 0, 0));
cv::Rect roi(leftPadding, topPadding, inputWidth, inputHeight);
inputScale.copyTo(letterboxImage(roi));
return letterboxImage;
}
void mapCoordinates(int *x, int *y) {
int mx = *x - leftPadding;
int my = *y - topPadding;
*x = (int)((float)mx / scale);
*y = (int)((float)my / scale);
}
void rx_buffer_Handle(char *p)
{
if(strncmp(p,"close",5)==0){
//close demo
//exit(0);
quit=true;//wei bian yi
}else if(strncmp(p,"reboot",6)==0){
//reboot system
system("reboot");
}else if(strncmp(p,"rkill",5)==0){
//killall rkipc
system("killall rkipc");
}
}
rtsp_demo_handle g_rtsplive = NULL;
rknn_app_context_t rknn_app_ctx;
MPP_CHN_S stSrcChn, stvpssChn;
VENC_STREAM_S stFrame;
int serial_fd;
pthread_t Handle_thread;
//parameter release
void sigint_handler(int sig){
//release
fprintf(stderr, "signal %d-------------------------------\n", sig);
//kill main thread
quit=true;
//kill read Thread
int ret = pthread_cancel(Handle_thread); // 向子线程发送取消信号
if (ret != 0) {
perror("Failed to cancel thread");
exit(-1);
}
}
static void *User_Handle(void *arg){
//UART handle
char rx_buffer[256];
while(!quit){
int bytes_read=read(serial_fd,rx_buffer,sizeof(rx_buffer));
if(bytes_read < 0){
perror("receive error");
}else{
rx_buffer[bytes_read]='\0';
//printf("RX:%s\n",rx_buffer);
write(serial_fd,rx_buffer,bytes_read);
rx_buffer_Handle(rx_buffer);
}
memset(rx_buffer,0,sizeof(rx_buffer));
}
}
int main(int argc, char *argv[]) {
int flags = fcntl(serial_fd, F_GETFL,0);
flags |= O_NONBLOCK;
fcntl(serial_fd, F_SETFL, flags);
RK_S32 s32Ret = 0;
int sX,sY,eX,eY;
// Rknn model
char text[16];
object_detect_result_list od_results;
int ret;
const char *model_path = "./model/yolov5.rknn";
memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t));
init_yolov5_model(model_path, &rknn_app_ctx);
printf("init rknn model success!\n");
init_post_process();
//h264_frame
stFrame.pstPack = (VENC_PACK_S *)malloc(sizeof(VENC_PACK_S));
VIDEO_FRAME_INFO_S h264_frame;
VIDEO_FRAME_INFO_S stVpssFrame;
// rkaiq init
RK_BOOL multi_sensor = RK_FALSE;
const char *iq_dir = "/etc/iqfiles";
rk_aiq_working_mode_t hdr_mode = RK_AIQ_WORKING_MODE_NORMAL;
//hdr_mode = RK_AIQ_WORKING_MODE_ISP_HDR2;
SAMPLE_COMM_ISP_Init(0, hdr_mode, multi_sensor, iq_dir);
SAMPLE_COMM_ISP_Run(0);
// rkmpi init
if (RK_MPI_SYS_Init() != RK_SUCCESS) {
RK_LOGE("rk mpi sys init fail!");
return -1;
}
// rtsp init
rtsp_session_handle g_rtsp_session;
g_rtsplive = create_rtsp_demo(554);
g_rtsp_session = rtsp_new_session(g_rtsplive, "/live/0");
rtsp_set_video(g_rtsp_session, RTSP_CODEC_ID_VIDEO_H264, NULL, 0);
rtsp_sync_video_ts(g_rtsp_session, rtsp_get_reltime(), rtsp_get_ntptime());
// vi init
vi_dev_init();
vi_chn_init(0, width, height);
// vpss init
test_vpss_init(0, width, height);
// bind vi to vpss
stSrcChn.enModId = RK_ID_VI;
stSrcChn.s32DevId = 0;
stSrcChn.s32ChnId = 0;
stvpssChn.enModId = RK_ID_VPSS;
stvpssChn.s32DevId = 0;
stvpssChn.s32ChnId = 0;
printf("====RK_MPI_SYS_Bind vi0 to vpss0====\n");
s32Ret = RK_MPI_SYS_Bind(&stSrcChn, &stvpssChn);
if (s32Ret != RK_SUCCESS) {
RK_LOGE("bind 0 ch venc failed");
return -1;
}
// venc init
RK_CODEC_ID_E enCodecType = RK_VIDEO_ID_AVC;
test_venc_init(0, width, height, enCodecType);
printf("venc init success\n");
//uart3 init
int serial_port_num;
const char serial_port[] = "/dev/ttyS3";
signal(SIGINT, sigint_handler);
serial_fd = open(serial_port, O_RDWR | O_NOCTTY);
if (serial_fd == -1) {
perror("Failed to open serial port");
return 1;
}
struct termios tty;
memset(&tty, 0, sizeof(tty));
if (tcgetattr(serial_fd, &tty) != 0) {
perror("Error from tcgetattr");
return 1;
}
cfsetospeed(&tty, B9600);
cfsetispeed(&tty, B9600);
tty.c_cflag &= ~PARENB;
tty.c_cflag &= ~CSTOPB;
tty.c_cflag &= ~CSIZE;
tty.c_cflag |= CS8;
if (tcsetattr(serial_fd, TCSANOW, &tty) != 0) {
perror("Error from tcsetattr");
return 1;
}
//init fork
char tx_buffer[256];
pthread_create(&Handle_thread, NULL, User_Handle, NULL);
int j;
while(!quit)
{
// get vpss frame
s32Ret = RK_MPI_VPSS_GetChnFrame(0,0, &stVpssFrame,-1);
if(s32Ret == RK_SUCCESS)
{
void *data = RK_MPI_MB_Handle2VirAddr(stVpssFrame.stVFrame.pMbBlk);
//opencv
//get camera frame
cv::Mat frame(height,width,CV_8UC3,data);
//cv::Mat frame640;
//cv::resize(frame, frame640, cv::Size(640,640), 0, 0, cv::INTER_LINEAR);
//letterbox
//change camera frame with black background and size 640x640
cv::Mat letterboxImage = letterbox(frame);
memcpy(rknn_app_ctx.input_mems[0]->virt_addr, letterboxImage.data, model_width*model_height*3);
inference_yolov5_model(&rknn_app_ctx, &od_results);
j=0;
for(int i = 0; i < od_results.count; i++)
{
//获取框的四个坐标
if(j>=5){
break;
}
if(od_results.count >= 1)
{
object_detect_result *det_result = &(od_results.results[i]);
/*printf("%s @ (%d %d %d %d) %.3f\n", coco_cls_to_name(det_result->cls_id),
det_result->box.left, det_result->box.top,
det_result->box.right, det_result->box.bottom,
det_result->prop);
*/
//frame is camera photo but det_result before mapCoordinates(&sX,&sY) is yolov5 result 640x640 location so use mapCoordinates(&sX,&sY) to mate input photo
//only person handle
if(det_result->cls_id==0)
{
sX = (int)(det_result->box.left );
sY = (int)(det_result->box.top );
eX = (int)(det_result->box.right );
eY = (int)(det_result->box.bottom );
// printf("$RV1106,%s,%d,%d,%.3f,%d,%d,%d,%d\n", coco_cls_to_name(det_result->cls_id),
// centerX,centerY,det_result->prop,
// det_result->box.left,det_result->box.right,
// det_result->box.top,det_result->box.bottom
// );
//send rknn result and send frame after treatment of input camera frame 720x480
mapCoordinates(&sX,&sY);
mapCoordinates(&eX,&eY);
int centerX = (sX + eX) / 2;
int centerY = (sY + eY) / 2;
sprintf(tx_buffer,"$RV1106,%s,%d,%d,%.3f,%d,%d,%d,%d\n", coco_cls_to_name(det_result->cls_id),
centerX,centerY,det_result->prop,
sX,eX,
sY,eY);
write(serial_fd,tx_buffer,strlen(tx_buffer));
memset(tx_buffer,0,sizeof(tx_buffer));
cv::rectangle(frame,cv::Point(sX ,sY),
cv::Point(eX ,eY),
cv::Scalar(0,255,0),3);
sprintf(text, "%s %.1f%%", coco_cls_to_name(det_result->cls_id), det_result->prop * 100);
cv::putText(frame,text,cv::Point(sX, sY - 8),
cv::FONT_HERSHEY_SIMPLEX,1,
cv::Scalar(0,255,0),2);
//draw circle
cv::circle(frame, cv::Point(centerX, centerY), 5, cv::Scalar(0, 0, 255), -1);
j++;
}
}
}
memcpy(data, frame.data, width * height * 3);
}
// send stream to venc to encode
// encode H264
RK_MPI_VENC_SendFrame(0, &stVpssFrame,-1);
// rtsp
s32Ret = RK_MPI_VENC_GetStream(0, &stFrame, -1);
if(s32Ret == RK_SUCCESS)
{
if(g_rtsplive && g_rtsp_session)
{
//printf("len = %d PTS = %d \n",stFrame.pstPack->u32Len, stFrame.pstPack->u64PTS);
//get venc encode stream
void *pData = RK_MPI_MB_Handle2VirAddr(stFrame.pstPack->pMbBlk);
rtsp_tx_video(g_rtsp_session, (uint8_t *)pData, stFrame.pstPack->u32Len,
stFrame.pstPack->u64PTS);
rtsp_do_event(g_rtsplive);
}
}
// release frame
s32Ret = RK_MPI_VPSS_ReleaseChnFrame(0, 0, &stVpssFrame);
if (s32Ret != RK_SUCCESS) {
RK_LOGE("RK_MPI_VI_ReleaseChnFrame fail %x", s32Ret);
}
s32Ret = RK_MPI_VENC_ReleaseStream(0, &stFrame);
if (s32Ret != RK_SUCCESS) {
RK_LOGE("RK_MPI_VENC_ReleaseStream fail %x", s32Ret);
}
memset(text,0,8);
}
//release thread
pthread_join(Handle_thread, NULL);
printf("exit result enter--------------------------------------------------------------------------------------------\n");
close(serial_fd);
RK_MPI_SYS_UnBind(&stSrcChn, &stvpssChn);
RK_MPI_VI_DisableChn(0, 0);
RK_MPI_VI_DisableDev(0);
RK_MPI_VPSS_StopGrp(0);
RK_MPI_VPSS_DestroyGrp(0);
RK_MPI_VENC_StopRecvFrame(0);
RK_MPI_VENC_DestroyChn(0);
free(stFrame.pstPack);
if (g_rtsplive)
rtsp_del_demo(g_rtsplive);
SAMPLE_COMM_ISP_Stop(0);
RK_MPI_SYS_Exit();
// Release rknn model
release_yolov5_model(&rknn_app_ctx);
deinit_post_process();
return 0;
}
虽然用条件让他只检测人的时候进行opencv的处理并且最多画5个框,但是画面还是很卡顿,我不知道是不是这个推理如果物品太多也会要很多时间,要修改哪里才能够减少这个检测推理的目标种类并且能提高一下rtsp流畅度呢,另外这个bytetrace的算法如果不用模型的话,有没有能够参考写入这个yolov5demo中的网址或例子呢?