Record RTSP stream with FFmpeg libavformat

Question

I'm trying to record RTSP stream from Axis camera with FFmpeg libavformat. I can grab video from files and then save it to another file, this is OK. But camera sends strange data, FPS is 100 and camera sends every 4th frame so result FPS is about 25. But libavformat set packets dts/pts for 90000 fps (default?) and new file stream has 100fps. Result is one hour video with only 100 frames.

Here is my code

#include <stdio.h>
#include <stdlib.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>


int main(int argc, char** argv) {

    AVFormatContext* context = avformat_alloc_context();
    int video_stream_index;

    av_register_all();
    avcodec_register_all();
    avformat_network_init();

    //open rtsp
    if(avformat_open_input(&context, "rtsp://195.200.199.8/mpeg4/media.amp",NULL,NULL) != 0){
        return EXIT_FAILURE;
    }

    if(avformat_find_stream_info(context,NULL) < 0){
        return EXIT_FAILURE;
    }

    //search video stream
    for(int i =0;i<context->nb_streams;i++){
        if(context->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
            video_stream_index = i;
    }

    AVPacket packet;
    av_init_packet(&packet);

    //open output file
    AVOutputFormat* fmt = av_guess_format(NULL,"test2.avi",NULL);
    AVFormatContext* oc = avformat_alloc_context();
    oc->oformat = fmt;
    avio_open2(&oc->pb, "test.avi", AVIO_FLAG_WRITE,NULL,NULL);

    AVStream* stream=NULL;
    int cnt = 0;
    //start reading packets from stream and write them to file

    av_read_play(context);//play RTSP
    while(av_read_frame(context,&packet)>=0 && cnt <100){//read 100 frames
        if(packet.stream_index == video_stream_index){//packet is video               
            if(stream == NULL){//create stream in file
                stream = avformat_new_stream(oc,context->streams[video_stream_index]->codec->codec);
                avcodec_copy_context(stream->codec,context->streams[video_stream_index]->codec);
                stream->sample_aspect_ratio = context->streams[video_stream_index]->codec->sample_aspect_ratio;
                avformat_write_header(oc,NULL);
            }
            packet.stream_index = stream->id;

            av_write_frame(oc,&packet);
            cnt++;
        }
        av_free_packet(&packet);
        av_init_packet(&packet);
    }
    av_read_pause(context);
    av_write_trailer(oc);
    avio_close(oc->pb);
    avformat_free_context(oc);

    return (EXIT_SUCCESS);
}

Result file is here: http://dl.dropbox.com/u/1243577/test.avi

Thanks for any advice

Answer 1

Here's how I do it. What I found was when receiving H264 the framerate in the stream is not correct. It sends 1/90000 Timebase. I skip initializing the new stream from the incoming stream and just copy certain parameters. The incoming r_frame_rate should be accurate if max_analyze_frames works correctly.

#include <stdio.h>
#include <stdlib.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <sys/time.h>

time_t get_time()
{
  struct timeval tv;

  gettimeofday( &tv, NULL );

  return tv.tv_sec; 
} 

int main( int argc, char* argv[] )
{
  AVFormatContext *ifcx = NULL;
  AVInputFormat *ifmt;
  AVCodecContext *iccx;
  AVCodec *icodec;
  AVStream *ist;
  int i_index;
  time_t timenow, timestart;
  int got_key_frame = 0;

  AVFormatContext *ofcx;
  AVOutputFormat *ofmt;
  AVCodecContext *occx;
  AVCodec *ocodec;
  AVStream *ost;
  int o_index;

  AVPacket pkt;

  int ix;

  const char *sProg = argv[ 0 ];
  const char *sFileInput;
  const char *sFileOutput;
  int bRunTime;

  if ( argc != 4 ) {
    printf( "Usage: %s url outfile runtime\n", sProg );
    return EXIT_FAILURE;
  } 
  sFileInput = argv[ 1 ];
  sFileOutput = argv[ 2 ];
  bRunTime = atoi( argv[ 3 ] );

  // Initialize library
  av_log_set_level( AV_LOG_DEBUG );
  av_register_all();
  avcodec_register_all(); 
  avformat_network_init();

  //
  // Input
  //

  //open rtsp
  if ( avformat_open_input( &ifcx, sFileInput, NULL, NULL) != 0 ) {
    printf( "ERROR: Cannot open input file\n" );
    return EXIT_FAILURE;
  }

  if ( avformat_find_stream_info( ifcx, NULL ) < 0 ) {
    printf( "ERROR: Cannot find stream info\n" );
    avformat_close_input( &ifcx );
    return EXIT_FAILURE;
  }

  snprintf( ifcx->filename, sizeof( ifcx->filename ), "%s", sFileInput );

  //search video stream
  i_index = -1;
  for ( ix = 0; ix < ifcx->nb_streams; ix++ ) {
    iccx = ifcx->streams[ ix ]->codec;
    if ( iccx->codec_type == AVMEDIA_TYPE_VIDEO ) {
      ist = ifcx->streams[ ix ];
      i_index = ix;
      break;
    }
  }
  if ( i_index < 0 ) {
    printf( "ERROR: Cannot find input video stream\n" );
    avformat_close_input( &ifcx );
    return EXIT_FAILURE;
  }

  //
  // Output
  //

  //open output file
  ofmt = av_guess_format( NULL, sFileOutput, NULL );
  ofcx = avformat_alloc_context();
  ofcx->oformat = ofmt;
  avio_open2( &ofcx->pb, sFileOutput, AVIO_FLAG_WRITE, NULL, NULL );

  // Create output stream
  //ost = avformat_new_stream( ofcx, (AVCodec *) iccx->codec );
  ost = avformat_new_stream( ofcx, NULL );
  avcodec_copy_context( ost->codec, iccx );

  ost->sample_aspect_ratio.num = iccx->sample_aspect_ratio.num;
  ost->sample_aspect_ratio.den = iccx->sample_aspect_ratio.den;

  // Assume r_frame_rate is accurate
  ost->r_frame_rate = ist->r_frame_rate;
  ost->avg_frame_rate = ost->r_frame_rate;
  ost->time_base = av_inv_q( ost->r_frame_rate );
  ost->codec->time_base = ost->time_base;

  avformat_write_header( ofcx, NULL );

  snprintf( ofcx->filename, sizeof( ofcx->filename ), "%s", sFileOutput );

  //start reading packets from stream and write them to file

  av_dump_format( ifcx, 0, ifcx->filename, 0 );
  av_dump_format( ofcx, 0, ofcx->filename, 1 );

  timestart = timenow = get_time();

  ix = 0;
  //av_read_play(context);//play RTSP (Shouldn't need this since it defaults to playing on connect)
  av_init_packet( &pkt );
  while ( av_read_frame( ifcx, &pkt ) >= 0 && timenow - timestart <= bRunTime ) {
    if ( pkt.stream_index == i_index ) { //packet is video               
      // Make sure we start on a key frame
      if ( timestart == timenow && ! ( pkt.flags & AV_PKT_FLAG_KEY ) ) {
        timestart = timenow = get_time();
        continue;
      }
      got_key_frame = 1;

      pkt.stream_index = ost->id;

      pkt.pts = ix++;
      pkt.dts = pkt.pts;

      av_interleaved_write_frame( ofcx, &pkt );
    }
    av_free_packet( &pkt );
    av_init_packet( &pkt );

    timenow = get_time();
  }
  av_read_pause( ifcx );
  av_write_trailer( ofcx );
  avio_close( ofcx->pb );
  avformat_free_context( ofcx );

  avformat_network_deinit();

  return EXIT_SUCCESS;
}

Answer 2

I don't think you're supposed to just increment the PTS value like that. It might work in rare occasions where the time base is just right, but for the general case it won't work.

You should change this:

pkt.pts = ix++;
pkt.dts = pkt.pts;

To this:

pkt.pts = av_rescale_q(pkt.pts, ifcx->streams[0]->codec->time_base, ofcx->streams[0]->time_base);
pkt.dts = av_rescale_q(pkt.dts, ifcx->streams[0]->codec->time_base, ofcx->streams[0]->time_base);

What that does is convert the packet's PTS/DTS from the units used in the input stream's codec to the units of the output stream.

Also, some streams have multiple ticks-per-frame, so if the video runs at double speed you might need to this right below the above line:

pkt.pts *= ifcx->streams[0]->codec->ticks_per_frame;
pkt.dts *= ifcx->streams[0]->codec->ticks_per_frame;

Answer 3

In my experience with a modern H.264 encoder, I'm finding that the duration returned by ffmpeg is only a "suggestion" and that there is some "jitter" in the PTS. The only accurate way to determine frame rate or duration is to measure it yourself using the PTS values.

For an H.264 encoder running at 30fps, duration is always reported as 3000/90000, while measured duration is usually +/- 1 but periodically jumps say 3000+25 one frame and 3000-25 the next. I'm smoothing this out for recording by noticing any adjacent frames with opposite deviation and adjusting the PTS of the 2nd frame while preserving the total duration.

This give me a stream with an occasional (calculated) duration of 30001 or 2999, reflecting clock drift.

When recording a 29.97fps stream, av_read_frame() always returns a duration of 3000, while the nominal calculated duration is 3003 (correct for 29.97) with the same jitter and drift as described above.

In my case, I just built a state machine to clean up the timing. Hoping this helps someone.

Answer 4

Recently was doing the same. I had FPS twice lower than the camera sent. The reason was in AVstream->codec->ticks_per_frame field, set to 2. My source was progressive, and in case yours in interleaved - then that might be a reason of yet another factor of 2, giving 4x different FPS. 90000 Hz is the default timebase for video stream sent via RTSP. Timebase is different from FPS in resolution. For instance, a frame with timestamp 30000 will be shown at 1/3 sec if the timebase is 90000 Hz. The timebase should be put into the AVstream structure during the output, but AVFormatContext should have a real FPS value.

Record RTSP stream with FFmpeg libavformat

Question

4 answers

solution1
6 2012-11-19 16:13:53

solution2
3 2013-12-19 10:34:27

solution3
1 2017-01-29 22:13:59

solution4
0 2012-02-13 03:24:20

Record RTSP stream with FFmpeg libavformat

Question

4 answers

solution1 6 2012-11-19 16:13:53

solution2 3 2013-12-19 10:34:27

solution3 1 2017-01-29 22:13:59

solution4 0 2012-02-13 03:24:20

solution1
6 2012-11-19 16:13:53

solution2
3 2013-12-19 10:34:27

solution3
1 2017-01-29 22:13:59

solution4
0 2012-02-13 03:24:20