[英]How to set pts and dts of AVPacket from RTP timestamps while muxing VP8 RTP stream to webm using ffmpeg libavformat?
我正在使用ffmpeg libavformat庫編寫僅視頻的webm文件。 我在服務器上收到VP8編碼的rtp流。 我已經成功地將rtp字節流(來自rtp有效負載)分組為單獨的幀,並構造了AVPacket。 我沒有在這里將有效負載重新編碼為VP8,因為它已經被vp8編碼了。
我正在使用av_write_interleaved()方法將AVPacket寫入文件。 雖然我得到一個webm文件作為輸出,但它根本沒有播放。 當我使用mkv工具的'mkvinfo'命令檢查文件信息時,發現以下信息:
+ EBML head
|+ EBML version: 1
|+ EBML read version: 1
|+ EBML maximum ID length: 4
|+ EBML maximum size length: 8
|+ Doc type: webm
|+ Doc type version: 2
|+ Doc type read version: 2
+ Segment, size 2142500
|+ Seek head (subentries will be skipped)
|+ EbmlVoid (size: 170)
|+ Segment information
| + Timestamp scale: 1000000
| + Multiplexing application: Lavf58.0.100
| + Writing application: Lavf58.0.100
| + Duration: 78918744.480s (21921:52:24.480)
|+ Segment tracks
| + A track
| + Track number: 1 (track ID for mkvmerge & mkvextract: 0)
| + Track UID: 1
| + Lacing flag: 0
| + Name: Video Track
| + Language: eng
| + Codec ID: V_VP8
| + Track type: video
| + Default duration: 1.000ms (1000.000 frames/fields per second for a
video track)
| + Video track
| + Pixel width: 640
| + Pixel height: 480
|+ Tags
| + Tag
| + Targets
| + Simple
| + Name: ENCODER
| + String: Lavf58.0.100
| + Tag
| + Targets
| + TrackUID: 1
| + Simple
| + Name: DURATION
| + String: 21921:52:24.4800000
|+ Cluster
我們可以看到,流的持續時間非常高。 (我的有效流時長應該在8到10秒左右)。 而且,曲目信息中的幀頻也不是我要設置的。 我將幀頻設置為25 fps。
我正在應用av_scale_q(rtpTimeStamp,codec_timebase,stream_timebase)並將重新縮放的rtpTimeStamp設置為pts和dts值。 我的猜測是我設置pts和dts的方式是錯誤的。 請幫助我如何在AVPacket上設置pts和dts值,以獲取具有適當元信息的有效webm文件。
編輯:
以下是我調用該庫的代碼:
#define STREAM_FRAME_RATE 25
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
AVFrame *frame;
} OutputStream;
typedef struct WebMWriter {
OutputStream *audioStream, *videoStream;
AVFormatContext *ctx;
AVOutputFormat *outfmt;
AVCodec *audioCodec, *videoCodec;
} WebMWriter;
static OutputStream audioStream = { 0 }, videoStream = { 0 };
WebMWriter *init(char *filename)
{
av_register_all();
AVFormatContext *ctx = NULL;
AVCodec *audioCodec = NULL, *videoCodec = NULL;
const char *fmt_name = NULL;
const char *file_name = filename;
int alloc_status = avformat_alloc_output_context2(&ctx, NULL, fmt_name, file_name);
if(!ctx)
return NULL;
AVOutputFormat *fmt = (*ctx).oformat;
AVDictionary *video_opt = NULL;
av_dict_set(&video_opt, "language", "eng", 0);
av_dict_set(&video_opt, "title", "Video Track", 0);
if(fmt->video_codec != AV_CODEC_ID_NONE)
{
addStream(&videoStream, ctx, &videoCodec, AV_CODEC_ID_VP8, video_opt);
}
if(videoStream.st)
openVideo1(&videoStream, videoCodec, NULL);
av_dump_format(ctx, 0, file_name, 1);
int ret = -1;
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ctx->pb, file_name, AVIO_FLAG_WRITE);
if (ret < 0) {
printf("Could not open '%s': %s\n", file_name, av_err2str(ret));
return NULL;
}
}
/* Write the stream header, if any. */
AVDictionary *format_opt = NULL;
ret = avformat_write_header(ctx, &format_opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return NULL;
}
WebMWriter *webmWriter = malloc(sizeof(struct WebMWriter));
webmWriter->ctx = ctx;
webmWriter->outfmt = fmt;
webmWriter->audioStream = &audioStream;
webmWriter->videoStream = &videoStream;
webmWriter->videoCodec = videoCodec;
return webmWriter;
}
以下是openVideo()方法:
void openVideo1(OutputStream *out_st, AVCodec *codec, AVDictionary *opt_arg)
{
AVCodecContext *codec_ctx = out_st->enc;
int ret = -1;
AVDictionary *opt = NULL;
if(opt_arg != NULL)
{
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
}
else
{
ret = avcodec_open2(codec_ctx, codec, NULL);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(out_st->st->codecpar, codec_ctx);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
}
以下是addStream()方法:
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
exit(1);
}
/*as we are passing a NULL AVCodec cdc, So AVCodecContext codec_ctx will not be allocated, we have to do it explicitly */
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
以下是我調用的將數據幀寫入文件的代碼:
int writeVideoStream(AVFormatContext *ctx, AVStream *st, uint8_t *data, int size, long frameTimeStamp, int isKeyFrame, AVCodecContext *codec_ctx)
{
AVRational rat = st->time_base;
AVPacket pkt = {0};
av_init_packet(&pkt);
void *opaque = NULL;
int flags = AV_BUFFER_FLAG_READONLY;
AVBufferRef *bufferRef = av_buffer_create(data, size, NULL, opaque, flags);
pkt.buf = bufferRef;
pkt.data = data;
pkt.size = size;
pkt.stream_index = st->index;
pkt.pts = pkt.dts = frameTimeStamp;
pkt.pts = av_rescale_q(pkt.pts, codec_ctx->time_base, st->time_base);
pkt.dts = av_rescale_q(pkt.dts, codec_ctx->time_base, st->time_base);
if(isKeyFrame == 1)
pkt.flags |= AV_PKT_FLAG_KEY;
int ret = av_interleaved_write_frame(ctx, &pkt);
return ret;
}
注意:這里的“ frameTimeStamp”是該幀的rtp數據包上的rtp timeStamp。
編輯2.0:
我使用codecpars更新的addStream()方法更改了:
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
printf("@@@@@ couldnt find codec \n");
exit(1);
}
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
printf("@@@@@ couldnt init stream\n");
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVCodecParameters *codecpars = st->codecpar;
codecpars->codec_id = codecId;
codecpars->codec_type = (*cdc)->type;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
//av_dict_free(&opt);
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
//since opus is experimental codec
//codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;//AV_SAMPLE_FMT_U8 or AV_SAMPLE_FMT_S16;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO; //AV_CH_LAYOUT_MONO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codecpars->format = codec_ctx->sample_fmt;
codecpars->channels = codec_ctx->channels;
codecpars->sample_rate = codec_ctx->sample_rate;
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
//codec_ctx->max_b_frames = 1;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->framerate = av_inv_q(codec_ctx->time_base);
st->avg_frame_rate = codec_ctx->framerate;//(AVRational){25000, 1000};
codecpars->format = codec_ctx->pix_fmt;
codecpars->width = codec_ctx->width;
codecpars->height = codec_ctx->height;
codecpars->sample_aspect_ratio = (AVRational){codec_ctx->width, codec_ctx->height};
break;
default:
break;
}
codecpars->bit_rate = codec_ctx->bit_rate;
int ret = avcodec_parameters_to_context(codec_ctx, codecpars);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
我認為您對計算pts / dts是正確的問題,請使用此公式手動計算時間戳,看看是否av_rescale_q
,然后可以使用av_rescale_q
。
這是我測試過的公式(用於原始(yuv)輸出):
int64_t frameTime;
int64_t frameDuration;
frameDuration = video_st->time_base.den / video_fps; // i.e. 25
frameTime = frame_count * frameDuration;
pkt->pts = frameTime / video_st->time_base.num;
pkt->duration = frameDuration;
pkt->dts = pkt->pts;
pkt->stream_index = video_st->index;
在av_interleaved_write_frame
之前使用它。
注意:這里的frame_count
是一個計數器,該計數器在每個視頻幀輸出(帶有av_interleaved_write_frame)之后增加。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.