使用 ffmpeg c 连接视频和音频时如何计算 pts 和 dts

Question

typedef struct file
{
    AVFormatContext *container;
    AVCodecContext **codec;
    int *frames;
} file;


int stream_clip(file *input, file *output)
{
    AVPacket *packet = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    int res;

    while (1)
    {
        res = decode_frame(input, frame, packet);

        if (res == 1)
        {
            printf("Error decoding a frame\n");
            av_frame_free(&frame);
            av_packet_free(&packet);

            return 1;
        }
        else if (res == 0)
        {

            AVCodecContext *codec = output->codec[packet->stream_index];
            AVRational fps = output->codec[packet->stream_index]->framerate;
            AVRational time_base = output->container->streams[packet->stream_index]->time_base;

            /*
            if (input->container->streams[packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
            {
                fps.num = 1,
                fps.den = input->container->streams[packet->stream_index]->codecpar->sample_rate;
            }
            */
           
            frame->pts = (int64_t)(av_q2d(av_div_q((AVRational){time_base.den, 1}, fps)) * output->frames[packet->stream_index]);

            frame->pkt_dts = frame->pts;
            frame->pkt_duration = frame->pts;

            printf("%i FRAME %i PTS %i\n", packet->stream_index, output->frames[packet->stream_index], frame->pts);

            output->frames[packet->stream_index]++;

            res = encode_frame(output, frame, packet->stream_index);
            if (res == 1)
            {
                av_frame_free(&frame);
                printf("Failde encoding frame\n");
                return 1;
            }
            av_frame_unref(frame);
        }

        else if (res == -1)
        {
            printf("\nfile \"%s\" ended\n", input->container->url);
            break;
        }
    }

    av_frame_free(&frame);
    //flush decoder
    decode_frame(input, NULL, packet);

    av_packet_free(&packet);

    return 0;
}

https://github.com/leandromoreira/ffmpeg-libav-tutorial#chapter-1---syncing-audio-and-video https://github.com/leandromoreira/ffmpeg-libav-tutorial#chapter-1---syncing-audio-and-video

I have tried calculating pts by doing timescale / fps * frame_number for video, and for audio I just let the ffmpeg do it for me, the video and audio play fine but the audio and video are desynchronised, the audio ends faster than the video我尝试通过对视频执行 timescale / fps * frame_number 来计算 pts，对于音频，我只是让 ffmpeg 为我做这件事，视频和音频播放正常，但音频和视频不同步，音频结束的速度比视频快

I also get this error [mp4 @ 0xe9c7780] Timestamps are unset in a packet for stream 1. This is deprecated and will stop working in the future.我也收到此错误 [mp4 @ 0xe9c7780] stream 1 的数据包中未设置时间戳。这已被弃用，将来将停止工作。 Fix your code to set the timestamps properly修复您的代码以正确设置时间戳

[mp4 @ 0xe9c7780] Encoder did not produce proper pts, making some up. [mp4 @ 0xe9c7780] 编码器没有产生正确的点，弥补了一些。

If I calculate the audio pts, vlc nor mpv can play the video correctly, mpv plays audio correctly, but incorrect video, and vlc plays correct video but no audio如果我计算音频pts，vlc和mpv都不能正确播放视频，mpv播放音频正确，但视频不正确，vlc播放正确视频但没有音频

mpv outputs this error: "Audio/Video desynchronisation detected, Possible reasons include too slow hardware, temporary CPU spikes, broken drivers. and broken files. Audio position will not match to the video (see AV status field)." mpv 输出此错误：“检测到音频/视频不同步，可能的原因包括硬件太慢、临时 CPU 峰值、损坏的驱动程序和损坏的文件。音频 position 与视频不匹配（请参阅 AV 状态字段）。”

Below is the pts calculated for each frame, 0 is audio, 1 is video下面是每帧计算的pts，0是音频，1是视频

frame_type FRAME frame_number PTS
0 FRAME 0 PTS 0
0 FRAME 1 PTS 512
0 FRAME 2 PTS 1024
0 FRAME 3 PTS 1536
1 FRAME 0 PTS 0
0 FRAME 4 PTS 2048
1 FRAME 1 PTS 0
0 FRAME 5 PTS 2560
1 FRAME 2 PTS 0
1 FRAME 3 PTS 0
0 FRAME 6 PTS 3072
1 FRAME 4 PTS 0
1 FRAME 5 PTS 0
0 FRAME 7 PTS 3584
1 FRAME 6 PTS 0
1 FRAME 7 PTS 0
0 FRAME 8 PTS 4096
1 FRAME 8 PTS 0
1 FRAME 9 PTS 0
0 FRAME 9 PTS 4608
1 FRAME 10 PTS 0
0 FRAME 10 PTS 5120
1 FRAME 11 PTS 0
1 FRAME 12 PTS 0
0 FRAME 11 PTS 5632
1 FRAME 13 PTS 0
1 FRAME 14 PTS 0
0 FRAME 12 PTS 6144
1 FRAME 15 PTS 0
1 FRAME 16 PTS 0
0 FRAME 13 PTS 6656
1 FRAME 17 PTS 0
1 FRAME 18 PTS 0
0 FRAME 14 PTS 7168
1 FRAME 19 PTS 0
0 FRAME 15 PTS 7680
1 FRAME 20 PTS 0
1 FRAME 21 PTS 0
0 FRAME 16 PTS 8192
1 FRAME 22 PTS 0
1 FRAME 23 PTS 0
0 FRAME 17 PTS 8704
1 FRAME 24 PTS 0
1 FRAME 25 PTS 0
0 FRAME 18 PTS 9216
1 FRAME 26 PTS 0
1 FRAME 27 PTS 0
0 FRAME 19 PTS 9728
1 FRAME 28 PTS 0
0 FRAME 20 PTS 10240
1 FRAME 29 PTS 0
1 FRAME 30 PTS 0
0 FRAME 21 PTS 10752
1 FRAME 31 PTS 0
1 FRAME 32 PTS 0
0 FRAME 22 PTS 11264
1 FRAME 33 PTS 0
1 FRAME 34 PTS 0
0 FRAME 23 PTS 11776
1 FRAME 35 PTS 0
1 FRAME 36 PTS 0
0 FRAME 24 PTS 12288
1 FRAME 37 PTS 0
0 FRAME 25 PTS 12800
1 FRAME 38 PTS 0
1 FRAME 39 PTS 0
0 FRAME 26 PTS 13312
1 FRAME 40 PTS 0
1 FRAME 41 PTS 0
0 FRAME 27 PTS 13824
1 FRAME 42 PTS 0
1 FRAME 43 PTS 0

file "in_short.mp4" ended
0 FRAME 28 PTS 14336
0 FRAME 29 PTS 14848
0 FRAME 30 PTS 15360
0 FRAME 31 PTS 15872
1 FRAME 44 PTS 0
0 FRAME 32 PTS 16384
1 FRAME 45 PTS 0
0 FRAME 33 PTS 16896
1 FRAME 46 PTS 0
1 FRAME 47 PTS 0
0 FRAME 34 PTS 17408
1 FRAME 48 PTS 0
1 FRAME 49 PTS 0
0 FRAME 35 PTS 17920
1 FRAME 50 PTS 0
1 FRAME 51 PTS 0
0 FRAME 36 PTS 18432
1 FRAME 52 PTS 0
1 FRAME 53 PTS 0
0 FRAME 37 PTS 18944
1 FRAME 54 PTS 0
0 FRAME 38 PTS 19456
1 FRAME 55 PTS 0
1 FRAME 56 PTS 0
0 FRAME 39 PTS 19968
1 FRAME 57 PTS 0
1 FRAME 58 PTS 0
0 FRAME 40 PTS 20480
1 FRAME 59 PTS 0
1 FRAME 60 PTS 0
0 FRAME 41 PTS 20992
1 FRAME 61 PTS 0
1 FRAME 62 PTS 0
0 FRAME 42 PTS 21504
1 FRAME 63 PTS 0
0 FRAME 43 PTS 22016
1 FRAME 64 PTS 0
1 FRAME 65 PTS 0
0 FRAME 44 PTS 22528
1 FRAME 66 PTS 0
1 FRAME 67 PTS 0
0 FRAME 45 PTS 23040
1 FRAME 68 PTS 0
1 FRAME 69 PTS 0
0 FRAME 46 PTS 23552
1 FRAME 70 PTS 0
1 FRAME 71 PTS 0
0 FRAME 47 PTS 24064
1 FRAME 72 PTS 0
0 FRAME 48 PTS 24576
1 FRAME 73 PTS 0
1 FRAME 74 PTS 0
0 FRAME 49 PTS 25088
1 FRAME 75 PTS 0
1 FRAME 76 PTS 0
0 FRAME 50 PTS 25600
1 FRAME 77 PTS 0
1 FRAME 78 PTS 0
0 FRAME 51 PTS 26112
1 FRAME 79 PTS 0
1 FRAME 80 PTS 0
0 FRAME 52 PTS 26624
1 FRAME 81 PTS 0
0 FRAME 53 PTS 27136
1 FRAME 82 PTS 0
1 FRAME 83 PTS 0
0 FRAME 54 PTS 27648
1 FRAME 84 PTS 0
1 FRAME 85 PTS 0
0 FRAME 55 PTS 28160
1 FRAME 86 PTS 0
1 FRAME 87 PTS 0

file "in_short.mp4" ended
0 FRAME 56 PTS 28672
0 FRAME 57 PTS 29184
0 FRAME 58 PTS 29696
0 FRAME 59 PTS 30208
1 FRAME 88 PTS 0
0 FRAME 60 PTS 30720
1 FRAME 89 PTS 0
0 FRAME 61 PTS 31232
1 FRAME 90 PTS 0
1 FRAME 91 PTS 0
0 FRAME 62 PTS 31744
1 FRAME 92 PTS 0
1 FRAME 93 PTS 0
0 FRAME 63 PTS 32256
1 FRAME 94 PTS 0
1 FRAME 95 PTS 0
0 FRAME 64 PTS 32768
1 FRAME 96 PTS 0
1 FRAME 97 PTS 0
0 FRAME 65 PTS 33280
1 FRAME 98 PTS 0
0 FRAME 66 PTS 33792
1 FRAME 99 PTS 0
1 FRAME 100 PTS 0
0 FRAME 67 PTS 34304
1 FRAME 101 PTS 0
1 FRAME 102 PTS 0
0 FRAME 68 PTS 34816
1 FRAME 103 PTS 0
1 FRAME 104 PTS 0
0 FRAME 69 PTS 35328
1 FRAME 105 PTS 0
1 FRAME 106 PTS 0
0 FRAME 70 PTS 35840
1 FRAME 107 PTS 0
0 FRAME 71 PTS 36352
1 FRAME 108 PTS 0
1 FRAME 109 PTS 0
0 FRAME 72 PTS 36864
1 FRAME 110 PTS 0
1 FRAME 111 PTS 0
0 FRAME 73 PTS 37376
1 FRAME 112 PTS 0
1 FRAME 113 PTS 0
0 FRAME 74 PTS 37888
1 FRAME 114 PTS 0
1 FRAME 115 PTS 0
0 FRAME 75 PTS 38400
1 FRAME 116 PTS 0
0 FRAME 76 PTS 38912
1 FRAME 117 PTS 0
1 FRAME 118 PTS 0
0 FRAME 77 PTS 39424
1 FRAME 119 PTS 0
1 FRAME 120 PTS 0
0 FRAME 78 PTS 39936
1 FRAME 121 PTS 0
1 FRAME 122 PTS 0
0 FRAME 79 PTS 40448
1 FRAME 123 PTS 0
1 FRAME 124 PTS 0
0 FRAME 80 PTS 40960
1 FRAME 125 PTS 0
0 FRAME 81 PTS 41472
1 FRAME 126 PTS 0
1 FRAME 127 PTS 0
0 FRAME 82 PTS 41984
1 FRAME 128 PTS 0
1 FRAME 129 PTS 0
0 FRAME 83 PTS 42496
1 FRAME 130 PTS 0
1 FRAME 131 PTS 0

file "in_short.mp4" ended

below when audio pts is calculated下面是计算音频pts时

0 FRAME 0 PTS 0
0 FRAME 1 PTS 512
0 FRAME 2 PTS 1024
0 FRAME 3 PTS 1536
1 FRAME 0 PTS 0
0 FRAME 4 PTS 2048
1 FRAME 1 PTS 1
0 FRAME 5 PTS 2560
1 FRAME 2 PTS 2
1 FRAME 3 PTS 3
0 FRAME 6 PTS 3072
1 FRAME 4 PTS 4
1 FRAME 5 PTS 5
0 FRAME 7 PTS 3584
1 FRAME 6 PTS 6
1 FRAME 7 PTS 7
0 FRAME 8 PTS 4096
1 FRAME 8 PTS 8
1 FRAME 9 PTS 9
0 FRAME 9 PTS 4608
1 FRAME 10 PTS 10
0 FRAME 10 PTS 5120
1 FRAME 11 PTS 11
1 FRAME 12 PTS 12
0 FRAME 11 PTS 5632
1 FRAME 13 PTS 13
1 FRAME 14 PTS 14
0 FRAME 12 PTS 6144
1 FRAME 15 PTS 15
1 FRAME 16 PTS 16
0 FRAME 13 PTS 6656
1 FRAME 17 PTS 17
1 FRAME 18 PTS 18
0 FRAME 14 PTS 7168
1 FRAME 19 PTS 19
0 FRAME 15 PTS 7680
1 FRAME 20 PTS 20
1 FRAME 21 PTS 21
0 FRAME 16 PTS 8192
1 FRAME 22 PTS 22
1 FRAME 23 PTS 23
0 FRAME 17 PTS 8704
1 FRAME 24 PTS 24
1 FRAME 25 PTS 25
0 FRAME 18 PTS 9216
1 FRAME 26 PTS 26
1 FRAME 27 PTS 27
0 FRAME 19 PTS 9728
1 FRAME 28 PTS 28
0 FRAME 20 PTS 10240
1 FRAME 29 PTS 29
1 FRAME 30 PTS 30
0 FRAME 21 PTS 10752
1 FRAME 31 PTS 31
1 FRAME 32 PTS 32
0 FRAME 22 PTS 11264
1 FRAME 33 PTS 33
1 FRAME 34 PTS 34
0 FRAME 23 PTS 11776
1 FRAME 35 PTS 35
1 FRAME 36 PTS 36
0 FRAME 24 PTS 12288
1 FRAME 37 PTS 37
0 FRAME 25 PTS 12800
1 FRAME 38 PTS 38
1 FRAME 39 PTS 39
0 FRAME 26 PTS 13312
1 FRAME 40 PTS 40
1 FRAME 41 PTS 41
0 FRAME 27 PTS 13824
1 FRAME 42 PTS 42
1 FRAME 43 PTS 43

file "in_short.mp4" ended
0 FRAME 28 PTS 14336
0 FRAME 29 PTS 14848
0 FRAME 30 PTS 15360
0 FRAME 31 PTS 15872
1 FRAME 44 PTS 44
0 FRAME 32 PTS 16384
1 FRAME 45 PTS 45
0 FRAME 33 PTS 16896
1 FRAME 46 PTS 46
1 FRAME 47 PTS 47
0 FRAME 34 PTS 17408
1 FRAME 48 PTS 48
1 FRAME 49 PTS 49
0 FRAME 35 PTS 17920
1 FRAME 50 PTS 50
1 FRAME 51 PTS 51
0 FRAME 36 PTS 18432
1 FRAME 52 PTS 52
1 FRAME 53 PTS 53
0 FRAME 37 PTS 18944
1 FRAME 54 PTS 54
0 FRAME 38 PTS 19456
1 FRAME 55 PTS 55
1 FRAME 56 PTS 56
0 FRAME 39 PTS 19968
1 FRAME 57 PTS 57
1 FRAME 58 PTS 58
0 FRAME 40 PTS 20480
1 FRAME 59 PTS 59
1 FRAME 60 PTS 60
0 FRAME 41 PTS 20992
1 FRAME 61 PTS 61
1 FRAME 62 PTS 62
0 FRAME 42 PTS 21504
1 FRAME 63 PTS 63
0 FRAME 43 PTS 22016
1 FRAME 64 PTS 64
1 FRAME 65 PTS 65
0 FRAME 44 PTS 22528
1 FRAME 66 PTS 66
1 FRAME 67 PTS 67
0 FRAME 45 PTS 23040
1 FRAME 68 PTS 68
1 FRAME 69 PTS 69
0 FRAME 46 PTS 23552
1 FRAME 70 PTS 70
1 FRAME 71 PTS 71
0 FRAME 47 PTS 24064
1 FRAME 72 PTS 72
0 FRAME 48 PTS 24576
1 FRAME 73 PTS 73
1 FRAME 74 PTS 74
0 FRAME 49 PTS 25088
1 FRAME 75 PTS 75
1 FRAME 76 PTS 76
0 FRAME 50 PTS 25600
1 FRAME 77 PTS 77
1 FRAME 78 PTS 78
0 FRAME 51 PTS 26112
1 FRAME 79 PTS 79
1 FRAME 80 PTS 80
0 FRAME 52 PTS 26624
1 FRAME 81 PTS 81
0 FRAME 53 PTS 27136
1 FRAME 82 PTS 82
1 FRAME 83 PTS 83
0 FRAME 54 PTS 27648
1 FRAME 84 PTS 84
1 FRAME 85 PTS 85
0 FRAME 55 PTS 28160
1 FRAME 86 PTS 86
1 FRAME 87 PTS 87

file "in_short.mp4" ended
0 FRAME 56 PTS 28672
0 FRAME 57 PTS 29184
0 FRAME 58 PTS 29696
0 FRAME 59 PTS 30208
1 FRAME 88 PTS 88
0 FRAME 60 PTS 30720
1 FRAME 89 PTS 89
0 FRAME 61 PTS 31232
1 FRAME 90 PTS 90
1 FRAME 91 PTS 91
0 FRAME 62 PTS 31744
1 FRAME 92 PTS 92
1 FRAME 93 PTS 93
0 FRAME 63 PTS 32256
1 FRAME 94 PTS 94
1 FRAME 95 PTS 95
0 FRAME 64 PTS 32768
1 FRAME 96 PTS 96
1 FRAME 97 PTS 97
0 FRAME 65 PTS 33280
1 FRAME 98 PTS 98
0 FRAME 66 PTS 33792
1 FRAME 99 PTS 99
1 FRAME 100 PTS 100
0 FRAME 67 PTS 34304
1 FRAME 101 PTS 101
1 FRAME 102 PTS 102
0 FRAME 68 PTS 34816
1 FRAME 103 PTS 103
1 FRAME 104 PTS 104
0 FRAME 69 PTS 35328
1 FRAME 105 PTS 105
1 FRAME 106 PTS 106
0 FRAME 70 PTS 35840
1 FRAME 107 PTS 107
0 FRAME 71 PTS 36352
1 FRAME 108 PTS 108
1 FRAME 109 PTS 109
0 FRAME 72 PTS 36864
1 FRAME 110 PTS 110
1 FRAME 111 PTS 111
0 FRAME 73 PTS 37376
1 FRAME 112 PTS 112
1 FRAME 113 PTS 113
0 FRAME 74 PTS 37888
1 FRAME 114 PTS 114
1 FRAME 115 PTS 115
0 FRAME 75 PTS 38400
1 FRAME 116 PTS 116
0 FRAME 76 PTS 38912
1 FRAME 117 PTS 117
1 FRAME 118 PTS 118
0 FRAME 77 PTS 39424
1 FRAME 119 PTS 119
1 FRAME 120 PTS 120
0 FRAME 78 PTS 39936
1 FRAME 121 PTS 121
1 FRAME 122 PTS 122
0 FRAME 79 PTS 40448
1 FRAME 123 PTS 123
1 FRAME 124 PTS 124
0 FRAME 80 PTS 40960
1 FRAME 125 PTS 125
0 FRAME 81 PTS 41472
1 FRAME 126 PTS 126
1 FRAME 127 PTS 127
0 FRAME 82 PTS 41984
1 FRAME 128 PTS 128
1 FRAME 129 PTS 129
0 FRAME 83 PTS 42496
1 FRAME 130 PTS 130
1 FRAME 131 PTS 131

file "in_short.mp4" ended

I have tried rescaling pts packets like in this example https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/transcoding.c line 448 with av_packet_rescale_ts() but since I am using multiple videos when a new video starts the new video pts starts with 0.我已经尝试像在这个例子https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/transcoding.c line 448 和 av_packet_rescale_ts() 中那样重新缩放 pts 数据包，但是因为我使用了多个视频开始新视频 pts 从 0 开始。

Video outputs This one is the one that mpv outputs this error Audio/Video desynchronisation detected, Possible reasons include too slow hardware, temporary CPU spikes, broken drivers.视频输出这是 mpv 输出此错误的那个检测到音频/视频不同步，可能的原因包括硬件太慢、临时 CPU 峰值、驱动程序损坏。 and broken files.和损坏的文件。 Audio position will not match to the video (see AV status field).音频 position 将与视频不匹配（请参阅 AV 状态字段）。 https://drive.google.com/file/d/1DlIOxJGiqUHumvuOQBISPNtHDnfuaKDE/view?usp=sharing https://drive.google.com/file/d/1DlIOxJGiqUHumvuOQBISPNtHDnfuaKDE/view?usp=sharing

https://drive.google.com/file/d/15fnrZT6XZw_CkOy51PsTbKG_F2ykM09M/view?usp=sharing This one plays fine but audio and video are desyncronized, it's not too noticesable because I appended only 3 videos, I appended the same video 100 times and the audio ends several minutes before the video https://drive.google.com/file/d/15fnrZT6XZw_CkOy51PsTbKG_F2ykM09M/view?usp=sharing这个播放不错，但是音视频不同步，不太明显，因为我只附加了3个视频，我附加了同一个视频100次音频在视频前几分钟结束

The full code from the "video editor" I am making: https://github.com/LentilStew/video_transcoder我正在制作的“视频编辑器”的完整代码： https://github.com/LentilStew/video_transcoder

I am very bad at programming, so my code isn't great.我很不擅长编程，所以我的代码不是很好。 The snippets given in this post are from this small transcoder I made这篇文章中给出的片段来自我制作的这个小型转码器

I compile with this gcc main.c -o compiled.out我用这个 gcc main.c -o compiled.out 编译
pkg-config --libs libavformat libavfilter libavutil libavcodec libswscale libavdevice libavutil

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/avutil.h>

typedef struct file
{
    AVFormatContext *container;
    AVCodecContext **codec;
    int *frames;
} file;

typedef struct EncoderContext
{
    file *encoder;

} EncoderContext;

file *create_output(int streams, const char *filename);
file *start_output_from_file(const char *path, file *input, const char *video_encoder, const char *audio_encoder);
int create_video_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder, int width, int height,
                         int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size);
int create_audio_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder,
                         int channels, int sample_rate, int bit_rate);
int decode_frame(file *decoder, AVFrame *frame, AVPacket *packet);
int open_media(file *video, const char input_path[], const char *video_codec, const char *audio_codec);
void save_gray_frame(unsigned char *buf, int width, int height);
int free_file(file *f);
int encode_frame(file *encoder, AVFrame *input_frame, int index);
int stream_clip(file *input, file *output);

int main()
{
    int res;
    int inputs_len = 2;

    file *input1 = malloc(sizeof(file));

    res = open_media(input1, "in_short.mp4", "h264_cuvid", NULL);

    file *output = start_output_from_file("output.mp4", input1, "h264_nvenc", NULL);

    if (res != 0 || !input1)
    {
        printf("Failed opening input 1");
        return 1;
    }

    stream_clip(input1, output);
    free_file(input1);

    for (int i = 0; i < inputs_len; i++)
    {
        file *input = malloc(sizeof(file));
        res = open_media(input, "in_short.mp4", "h264_cuvid", NULL);
        stream_clip(input, output);
        free_file(input);
    }
    encode_frame(output, NULL, 0);
    encode_frame(output, NULL, 1);

    av_write_trailer(output->container);

    free_file(output);
}

int stream_clip(file *input, file *output)
{
    AVPacket *packet = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    int res;

    while (1)
    {
        res = decode_frame(input, frame, packet);

        if (res == 1)
        {
            printf("Error decoding a frame\n");
            av_frame_free(&frame);
            av_packet_free(&packet);

            return 1;
        }
        else if (res == 0)
        {

            AVCodecContext *codec = output->codec[packet->stream_index];
            AVRational fps = output->codec[packet->stream_index]->framerate;
            AVRational time_base = output->container->streams[packet->stream_index]->time_base;

            
            if (input->container->streams[packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
            {
                fps.den = 1,
                fps.num = input->container->streams[packet->stream_index]->codecpar->sample_rate;
            }
            

            frame->pts = (int64_t)(av_q2d(av_div_q((AVRational){time_base.den, 1}, fps)) * output->frames[packet->stream_index]);

            frame->pkt_dts = frame->pts;
            frame->pkt_duration = frame->pts;

            printf("%i FRAME %i PTS %i\n", packet->stream_index, output->frames[packet->stream_index], frame->pts);

            output->frames[packet->stream_index]++;

            res = encode_frame(output, frame, packet->stream_index);
            if (res == 1)
            {
                av_frame_free(&frame);
                printf("Failde encoding frame\n");
                return 1;
            }
            av_frame_unref(frame);
        }

        else if (res == -1)
        {
            printf("\nfile \"%s\" ended\n", input->container->url);
            break;
        }
    }

    av_frame_free(&frame);

    decode_frame(input, NULL, packet);

    av_packet_free(&packet);

    return 0;
}

int encode_frame(file *encoder, AVFrame *input_frame, int index)
{

    AVPacket *output_packet = av_packet_alloc();
    if (!output_packet)
    {
        printf("ENCODER: Failed mallocing output_package");
        return 1;
    }

    AVCodecContext *codec = encoder->codec[index];

    if (!codec)
        return 0;

    int response = avcodec_send_frame(codec, input_frame);

    while (response >= 0)
    {
        response = avcodec_receive_packet(codec, output_packet);

        if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
        {
            break;
        }
        else if (response < 0)
        {
            printf("ENCODER: Error receiving packet");

            return 1;
        }

        output_packet->stream_index = index;

        response = av_interleaved_write_frame(encoder->container, output_packet);

        if (response != 0)
        {
            printf("ENCODER:failed writing frame");

            return 1;
        }
    }
    av_packet_unref(output_packet);
    av_packet_free(&output_packet);

    return 0;
}

int free_file(file *f)
{
    int i;
    for (i = 0; i < f->container->nb_streams; i++)
    {
        if (f->codec[i] == NULL)
        {
            continue;
        }
        avcodec_free_context(&f->codec[i]);
    }

    av_free(f->codec);

    avformat_close_input(&f->container);

    free(f);
}

int open_media(file *video, const char input_path[], const char *video_codec, const char *audio_codec)
{
    video->container = avformat_alloc_context();

    if (!video->container)
    {
        printf("Failed to alloc memory to the container of the input file");
        return 1;
    }
    if (avformat_open_input(&video->container, input_path, NULL, NULL) != 0)
    {
        printf("Failed to open input file");
        return 1;
    }
    if (avformat_find_stream_info(video->container, NULL) < 0)
    {
        printf("Failed to open read stream info");
        return 1;
    }

    video->codec = calloc(video->container->nb_streams, sizeof(AVCodecContext *));

    for (unsigned int i = 0; i < video->container->nb_streams; i++)
    {
        const char *curr_codec = NULL;

        AVStream *stream = video->container->streams[i];
        const AVCodec *dec;
        AVCodecContext *codec_ctx;

        if (AVMEDIA_TYPE_VIDEO == stream->codecpar->codec_type)
        {
            curr_codec = video_codec;
        }
        else if (AVMEDIA_TYPE_AUDIO == stream->codecpar->codec_type)
        {
            curr_codec = audio_codec;
        }

        if (curr_codec == NULL)
            dec = avcodec_find_decoder(stream->codecpar->codec_id);
        else
            dec = avcodec_find_decoder_by_name(video_codec);

        if (!dec)
        {
            printf("failed to find the codec");
            return 1;
        }

        codec_ctx = avcodec_alloc_context3(dec);
        if (!codec_ctx)
        {
            printf("failed to alloc memory for codec context");
            return 1;
        }

        if (avcodec_parameters_to_context(codec_ctx, stream->codecpar) < 0)
        {
            printf("failed to fill codec context");
            return 1;
        }

        if (avcodec_open2(codec_ctx, dec, NULL) < 0)
        {
            printf("failed to open codec");
            return 1;
        }

        video->codec[i] = codec_ctx;
    }
    return 0;
}

/*
    returns:
    1 if error
    0 if success
    -1 if file ended
*/
int decode_frame(file *decoder, AVFrame *frame, AVPacket *packet)
{
    AVCodecContext *dec;

    while (1)
    {
        av_packet_unref(packet);
        if (av_read_frame(decoder->container, packet) < 0)
            break;

        int index = packet->stream_index;

        dec = decoder->codec[index];

        int response = avcodec_send_packet(dec, packet);

        if (response < 0)
        {
            printf("Error while sending packet to decoder");
            return 1;
        }

        while (response >= 0)
        {
            response = avcodec_receive_frame(dec, frame);
            if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
            {
                break;
            }
            else if (response < 0)
            {
                printf("Error while receiving frame from decoder");
                return 1;
            }
            if (response >= 0)
            {
                return 0;
            }
            av_frame_unref(frame);
        }
    }
    return -1;
}
int create_audio_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder,
                         int channels, int sample_rate, int bit_rate)
{
    AVStream *stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE AUDIO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec *enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE AUDIO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE AUDIO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->channels = channels;
    cod_ctx[0]->channel_layout = av_get_default_channel_layout(channels);
    cod_ctx[0]->sample_rate = sample_rate;
    cod_ctx[0]->sample_fmt = *enc->sample_fmts;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->time_base = (AVRational){1, sample_rate};

    int res = 0;

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

int create_video_encoder(AVCodecContext **cod_ctx, AVFormatContext *container, const char *encoder, int width, int height,
                         int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size)
{
    AVStream *stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE VIDEO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec *enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE VIDEO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE VIDEO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->height = height;
    cod_ctx[0]->width = width;
    cod_ctx[0]->pix_fmt = pix_fmt;

    cod_ctx[0]->sample_aspect_ratio = sample_aspect_ratio;
    cod_ctx[0]->time_base = av_inv_q(frame_rate); //av_inv_q(frame_rate);
    cod_ctx[0]->framerate = frame_rate;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->rc_buffer_size = buffer_size;
    cod_ctx[0]->rc_max_rate = buffer_size;
    cod_ctx[0]->rc_min_rate = buffer_size;

    stream->time_base = cod_ctx[0]->time_base; //cod_ctx->time_base;

    int res = 0;

    res = av_opt_set(cod_ctx[0]->priv_data, "preset", "fast", 0);

    if (res != 0)
    {
        printf("CREATE VIDEO ENCODER: Failed opt set");
        return 1;
    }

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

file *start_output_from_file(const char *path, file *input, const char *video_encoder, const char *audio_encoder)
{
    int res;

    file *output = create_output(input->container->nb_streams, path);
    if (!output)
    {
        return NULL;
    }
    AVCodecContext *codec_ctx;
    output->frames = calloc(input->container->nb_streams, sizeof(int));
    for (int stream = 0; stream < input->container->nb_streams; stream++)
    {
        codec_ctx = input->codec[stream];

        switch (codec_ctx->codec_type)
        {
        case AVMEDIA_TYPE_AUDIO:
            if (audio_encoder == NULL)
            {
                audio_encoder = codec_ctx->codec_descriptor->name;
            }
            res = create_audio_encoder(&output->codec[stream], output->container, audio_encoder, codec_ctx->channels, codec_ctx->sample_rate, codec_ctx->bit_rate);

            break;

        case AVMEDIA_TYPE_VIDEO:
            if (video_encoder == NULL)
            {
                video_encoder = codec_ctx->codec_descriptor->name;
            }
            AVRational framerate = av_guess_frame_rate(input->container, input->container->streams[stream], NULL);
            res = create_video_encoder(&output->codec[stream], output->container, video_encoder, codec_ctx->width, codec_ctx->height,
                                       codec_ctx->sw_pix_fmt, (AVRational){1, 1}, framerate, codec_ctx->bit_rate, codec_ctx->rc_buffer_size);
            break;
        }
        if (res != 0)
        {
            printf("Failed opening encoder stream number %i \n", stream);
            return NULL;
        }
    }

    if (output->container->oformat->flags & AVFMT_GLOBALHEADER)
        output->container->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    if (!(output->container->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&output->container->pb, path, AVIO_FLAG_WRITE) < 0)
        {
            printf("could not open the output file");
            return NULL;
        }
    }

    AVDictionary *muxer_opts = NULL;

    if (avformat_write_header(output->container, &muxer_opts) < 0)
    {
        printf("an error occurred when opening output file");
        return NULL;
    }

    return output;
}

file *create_output(int streams, const char *filename)
{
    int res;

    file *output = malloc(sizeof(file));
    if (!output)
    {
        return NULL;
    }
    res = avformat_alloc_output_context2(&output->container, NULL, NULL, filename);
    if (res < 0)
    {
        printf("Failed opening output\n");
        return NULL;
    }

    output->codec = av_calloc(streams, sizeof(AVCodecContext *));

    if (!output->codec)
    {
        printf("Failed allocating ram for codec\n");
        return NULL;
    }

    for (int stream = 0; stream < streams; stream++)
    {
        output->codec[stream] = NULL;
    }

    return output;
}

Answer 1

The main issue is setting packet duration to pts: frame->pkt_duration = frame->pts .主要问题是将数据包持续时间设置为 pts: frame->pkt_duration = frame->pts 。

The duration is normally the same for all frames, and the pts is incrementing.所有帧的持续时间通常相同，并且 pts 正在递增。

Other issues:其他问题：

Each audio packet has multiple audio frames.每个音频包有多个音频帧。
According to the following post , "you can have 1024 (or 960) frames per packet".根据以下帖子，“每个数据包可以有 1024（或 960）帧”。
We have to scale the packet duration and the pts by "frames per packet".我们必须通过“每个数据包的帧数”来缩放数据包持续时间和 pts。
av_packet_unref(output_packet) seems not in place (I don't know if it meters). av_packet_unref(output_packet)似乎没有到位（我不知道它是否计量）。
The resolution of the video time base seems too low, I modified it to 1/60000 (I don't know if it meters).视频时基的分辨率好像太低了，我修改为1/60000（不知道是不是米）。
The pts, dts and duration is not set for the packets (I don't know if we have to set them both per frame and per packet).没有为数据包设置 pts、dts 和持续时间（我不知道我们是否必须在每帧和每数据包中都设置它们）。
Just in case, I added pts, dts and duration for every packet.以防万一，我为每个数据包添加了 pts、dts 和持续时间。

I am still learning the C interface of Libav.我还在学习libav的C接口。
My suggested solution is probably not perfect...我建议的解决方案可能并不完美......

Here is the updated code for setting the pts, dts and duration:这是用于设置 pts、dts 和持续时间的更新代码：

AVRational fps = output->codec[packet->stream_index]->framerate;
int frames_per_packet = 1;
AVRational time_base = output->container->streams[packet->stream_index]->time_base;

if (input->container->streams[packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
    fps.den = 1;
    fps.num = input->container->streams[packet->stream_index]->codecpar->sample_rate;
    frames_per_packet = input->container->streams[packet->stream_index]->codecpar->frame_size;  //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
}

frame->pkt_duration = (int64_t)(av_q2d(av_div_q((AVRational) { time_base.den, 1 }, fps))) * (int64_t)frames_per_packet;
frame->pts = (int64_t)(frame->pkt_duration * output->frames[packet->stream_index]);
frame->pkt_dts = frame->pts;

Complete updated code:完整更新代码：

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/avutil.h>

typedef struct file
{
    AVFormatContext* container;
    AVCodecContext** codec;
    int* frames;
      
    //Counting packets (same principle as counting frames). 
    ////////////////////////////////////////////////////////////////////
    int* packets;
    ////////////////////////////////////////////////////////////////////
} file;

typedef struct EncoderContext
{
    file* encoder;

} EncoderContext;

file* create_output(int streams, const char* filename);
file* start_output_from_file(const char* path, file* input, const char* video_encoder, const char* audio_encoder);
int create_video_encoder(AVCodecContext** cod_ctx, AVFormatContext* container, const char* encoder, int width, int height,
    int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size);
int create_audio_encoder(AVCodecContext** cod_ctx, AVFormatContext* container, const char* encoder,
    int channels, int sample_rate, int bit_rate);
int decode_frame(file* decoder, AVFrame* frame, AVPacket* packet);
int open_media(file* video, const char input_path[], const char* video_codec, const char* audio_codec);
void save_gray_frame(unsigned char* buf, int width, int height);
int free_file(file* f);
int encode_frame(file* encoder, AVFrame* input_frame, int index);
int stream_clip(file* input, file* output);

int main()
{
    int res;
    int inputs_len = 2;

    //file* input1 = malloc(sizeof(file));
    file* input1 = calloc(sizeof(file), 1);

    res = open_media(input1, "in_short.mp4", "h264_cuvid", NULL);

    file* output = start_output_from_file("output.mp4", input1, "h264_nvenc", NULL);

    if (res != 0 || !input1)
    {
        printf("Failed opening input 1");
        return 1;
    }

    stream_clip(input1, output);
    free_file(input1);

    for (int i = 0; i < inputs_len; i++)
    {
        //file* input = malloc(sizeof(file));
        file* input = calloc(sizeof(file), 1);
        res = open_media(input, "in_short.mp4", "h264_cuvid", NULL);
        stream_clip(input, output);
        free_file(input);
    }

    encode_frame(output, NULL, 0);
    encode_frame(output, NULL, 1);    

    av_write_trailer(output->container);

    free_file(output);
}

int stream_clip(file* input, file* output)
{
    AVPacket* packet = av_packet_alloc();
    AVFrame* frame = av_frame_alloc();
    int res;

    while (1)
    {
        res = decode_frame(input, frame, packet);

        if (res == 1)
        {
            printf("Error decoding a frame\n");
            av_frame_free(&frame);
            av_packet_free(&packet);

            return 1;
        }
        else if (res == 0)
        {

            AVCodecContext* codec = output->codec[packet->stream_index];
            AVRational fps = output->codec[packet->stream_index]->framerate;
            int frames_per_packet = 1;
            AVRational time_base = output->container->streams[packet->stream_index]->time_base;


            if (input->container->streams[packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
            {
                fps.den = 1;
                fps.num = input->container->streams[packet->stream_index]->codecpar->sample_rate;

                ////////////////////////////////////////////////////////////////////
                frames_per_packet = input->container->streams[packet->stream_index]->codecpar->frame_size;  //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
                ////////////////////////////////////////////////////////////////////
            }

            //Why pkt_duration = pts???
            ////////////////////////////////////////////////////////////////////
            //frame->pkt_duration = frame->pts;
            frame->pkt_duration = (int64_t)(av_q2d(av_div_q((AVRational) { time_base.den, 1 }, fps))) * (int64_t)frames_per_packet;
            ////////////////////////////////////////////////////////////////////

            frame->pts = (int64_t)(frame->pkt_duration * output->frames[packet->stream_index]);

            frame->pkt_dts = frame->pts;

            printf("%i FRAME %i PTS %i\n", (int)packet->stream_index, (int)output->frames[packet->stream_index], (int)frame->pts);

            output->frames[packet->stream_index]++;

            res = encode_frame(output, frame, packet->stream_index);
            if (res == 1)
            {
                av_frame_free(&frame);
                printf("Failde encoding frame\n");
                return 1;
            }
            av_frame_unref(frame);
        }

        else if (res == -1)
        {
            printf("\nfile \"%s\" ended\n", input->container->url);
            break;
        }
    }

    av_frame_free(&frame);

    decode_frame(input, NULL, packet);

    av_packet_free(&packet);

    return 0;
}

int encode_frame(file* encoder, AVFrame* input_frame, int index)
{

    AVPacket* output_packet = av_packet_alloc();
    if (!output_packet)
    {
        printf("ENCODER: Failed mallocing output_package");
        return 1;
    }

    AVCodecContext* codec = encoder->codec[index];

    if (!codec)
        return 0;

    int response = avcodec_send_frame(codec, input_frame);

    while (response >= 0)
    {
        //The packet unref is supposed to be here
        ////////////////////////////////////////////////////////////////////////
        av_packet_unref(output_packet);
        ////////////////////////////////////////////////////////////////////////

        response = avcodec_receive_packet(codec, output_packet);

        if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
        {
            break;
        }
        else if (response < 0)
        {
            printf("ENCODER: Error receiving packet");

            return 1;
        }

        output_packet->stream_index = index;

        //I think we have to set PTS, DTS and duration for each packet.
        ////////////////////////////////////////////////////////////////////////
        //output_packet->pts = input_frame->pts;
        //output_packet->dts = input_frame->pkt_dts;
        //output_packet->duration = input_frame->pkt_duration;
        AVRational fps = codec->framerate;
        int frames_per_packet = 1;

        if (encoder->container->streams[index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            fps.den = 1;
            fps.num = encoder->container->streams[index]->codecpar->sample_rate;

            ////////////////////////////////////////////////////////////////////
            frames_per_packet = encoder->container->streams[index]->codecpar->frame_size;  //For the audio there are 1024 (or 960) frames per packet https://stackoverflow.com/questions/23216103/about-definition-for-terms-of-audio-codec
            ////////////////////////////////////////////////////////////////////
        }

        AVRational time_base = encoder->container->streams[index]->time_base;

        output_packet->duration = (int64_t)(av_q2d(av_div_q((AVRational) { time_base.den, 1 }, fps))) * (int64_t)frames_per_packet;
        output_packet->pts = (int64_t)(output_packet->duration * encoder->packets[index]);
        output_packet->dts = output_packet->pts;

        encoder->packets[index]++;  //Count packets
        ////////////////////////////////////////////////////////////////////////

        response = av_interleaved_write_frame(encoder->container, output_packet);

        if (response != 0)
        {
            printf("ENCODER:failed writing frame");

            return 1;
        }
    }
    //av_packet_unref(output_packet);
    av_packet_free(&output_packet);

    return 0;
}

int free_file(file* f)
{
    int i;
    for (i = 0; i < (int)f->container->nb_streams; i++)
    {
        if (f->codec[i] == NULL)
        {
            continue;
        }
        avcodec_free_context(&f->codec[i]);
    }

    //av_free - Free a memory block which has been allocated with a function of av_malloc(), but f->codec is not allocated with av_malloc()???
    //av_free(f->codec);

    avformat_close_input(&f->container);

    ////////////////////////////////////////////////////////////////////////
    if (f->frames != NULL)
    {
        free(f->frames);
    }

    if (f->packets != NULL)
    {
        free(f->packets);
    }
    ////////////////////////////////////////////////////////////////////////

    free(f);

    return 0;
}

int open_media(file* video, const char input_path[], const char* video_codec, const char* audio_codec)
{
    video->container = avformat_alloc_context();

    if (!video->container)
    {
        printf("Failed to alloc memory to the container of the input file");
        return 1;
    }
    if (avformat_open_input(&video->container, input_path, NULL, NULL) != 0)
    {
        printf("Failed to open input file");
        return 1;
    }
    if (avformat_find_stream_info(video->container, NULL) < 0)
    {
        printf("Failed to open read stream info");
        return 1;
    }

    video->codec = calloc(video->container->nb_streams, sizeof(AVCodecContext*));

    for (unsigned int i = 0; i < video->container->nb_streams; i++)
    {
        const char* curr_codec = NULL;

        AVStream* stream = video->container->streams[i];
        const AVCodec* dec;
        AVCodecContext* codec_ctx;

        if (AVMEDIA_TYPE_VIDEO == stream->codecpar->codec_type)
        {
            curr_codec = video_codec;
        }
        else if (AVMEDIA_TYPE_AUDIO == stream->codecpar->codec_type)
        {
            curr_codec = audio_codec;
        }

        if (curr_codec == NULL)
            dec = avcodec_find_decoder(stream->codecpar->codec_id);
        else
            dec = avcodec_find_decoder_by_name(video_codec);

        if (!dec)
        {
            printf("failed to find the codec");
            return 1;
        }

        codec_ctx = avcodec_alloc_context3(dec);
        if (!codec_ctx)
        {
            printf("failed to alloc memory for codec context");
            return 1;
        }

        if (avcodec_parameters_to_context(codec_ctx, stream->codecpar) < 0)
        {
            printf("failed to fill codec context");
            return 1;
        }

        if (avcodec_open2(codec_ctx, dec, NULL) < 0)
        {
            printf("failed to open codec");
            return 1;
        }

        video->codec[i] = codec_ctx;
    }
    return 0;
}

/*
    returns:
    1 if error
    0 if success
    -1 if file ended
*/
int decode_frame(file* decoder, AVFrame* frame, AVPacket* packet)
{
    AVCodecContext* dec;

    while (1)
    {
        av_packet_unref(packet);
        if (av_read_frame(decoder->container, packet) < 0)
            break;

        int index = packet->stream_index;

        dec = decoder->codec[index];

        int response = avcodec_send_packet(dec, packet);

        if (response < 0)
        {
            printf("Error while sending packet to decoder");
            return 1;
        }

        while (response >= 0)
        {
            response = avcodec_receive_frame(dec, frame);
            if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
            {
                break;
            }
            else if (response < 0)
            {
                printf("Error while receiving frame from decoder");
                return 1;
            }
            if (response >= 0)
            {
                return 0;
            }
            av_frame_unref(frame);
        }
    }
    return -1;
}
int create_audio_encoder(AVCodecContext** cod_ctx, AVFormatContext* container, const char* encoder,
    int channels, int sample_rate, int bit_rate)
{
    AVStream* stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE AUDIO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec* enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE AUDIO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE AUDIO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->channels = channels;
    cod_ctx[0]->channel_layout = av_get_default_channel_layout(channels);
    cod_ctx[0]->sample_rate = sample_rate;
    cod_ctx[0]->sample_fmt = *enc->sample_fmts;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->time_base = (AVRational){ 1, sample_rate }; // 1/48000

    int res = 0;

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE AUDIO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

int create_video_encoder(AVCodecContext** cod_ctx, AVFormatContext* container, const char* encoder, int width, int height,
    int pix_fmt, AVRational sample_aspect_ratio, AVRational frame_rate, int bit_rate, int buffer_size)
{
    AVStream* stream = avformat_new_stream(container, NULL);
    if (!stream)
    {
        printf("CREATE VIDEO ENCODER: Failed allocating memory for stream");
        return 1;
    }
    const AVCodec* enc = avcodec_find_encoder_by_name(encoder);
    if (!enc)
    {
        printf("CREATE VIDEO ENCODER: Failed searching encoder");

        return 1;
    }

    cod_ctx[0] = avcodec_alloc_context3(enc);

    if (!cod_ctx[0])
    {
        printf("CREATE VIDEO ENCODER: Failed allocation codec context");
        return 1;
    }

    cod_ctx[0]->height = height;
    cod_ctx[0]->width = width;
    cod_ctx[0]->pix_fmt = pix_fmt;

    cod_ctx[0]->sample_aspect_ratio = sample_aspect_ratio;

    //It's not a good idea to set the video time base to 1/60 - we need higher resolution for allowing audio synchronization
    ////////////////////////////////////////////////////////////////////////////
    cod_ctx[0]->time_base = av_make_q(1, 60000);//av_inv_q(frame_rate); //av_inv_q(frame_rate);
    ////////////////////////////////////////////////////////////////////////////

    cod_ctx[0]->framerate = frame_rate;
    cod_ctx[0]->bit_rate = bit_rate;
    cod_ctx[0]->rc_buffer_size = buffer_size;
    cod_ctx[0]->rc_max_rate = buffer_size;
    cod_ctx[0]->rc_min_rate = buffer_size;

    stream->time_base = cod_ctx[0]->time_base; //cod_ctx->time_base;

    int res = 0;

    res = av_opt_set(cod_ctx[0]->priv_data, "preset", "fast", 0);

    if (res != 0)
    {
        printf("CREATE VIDEO ENCODER: Failed opt set");
        return 1;
    }

    res = avcodec_open2(cod_ctx[0], enc, NULL);
    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: couldn't open codec");
        return 1;
    }

    res = avcodec_parameters_from_context(stream->codecpar, cod_ctx[0]);

    if (res < 0)
    {
        printf("CREATE VIDEO ENCODER: failed setting codec parameters from context");
        return 1;
    }

    return 0;
}

file* start_output_from_file(const char* path, file* input, const char* video_encoder, const char* audio_encoder)
{
    int res;

    file* output = create_output(input->container->nb_streams, path);
    if (!output)
    {
        return NULL;
    }
    AVCodecContext* codec_ctx;
    output->frames = calloc(input->container->nb_streams, sizeof(int));
    output->packets = calloc(input->container->nb_streams, sizeof(int));
    for (int stream = 0; stream < (int)input->container->nb_streams; stream++)
    {
        codec_ctx = input->codec[stream];

        switch (codec_ctx->codec_type)
        {
        case AVMEDIA_TYPE_AUDIO:
            if (audio_encoder == NULL)
            {
                audio_encoder = codec_ctx->codec_descriptor->name;
            }
            res = create_audio_encoder(&output->codec[stream], output->container, audio_encoder, codec_ctx->channels, codec_ctx->sample_rate, (int)codec_ctx->bit_rate);

            break;

        case AVMEDIA_TYPE_VIDEO:
            if (video_encoder == NULL)
            {
                video_encoder = codec_ctx->codec_descriptor->name;
            }
            AVRational framerate = av_guess_frame_rate(input->container, input->container->streams[stream], NULL);
            res = create_video_encoder(&output->codec[stream], output->container, video_encoder, codec_ctx->width, codec_ctx->height,
                codec_ctx->sw_pix_fmt, (AVRational) { 1, 1 }, framerate, (int)codec_ctx->bit_rate, codec_ctx->rc_buffer_size);
            break;
        }
        if (res != 0)
        {
            printf("Failed opening encoder stream number %i \n", stream);
            return NULL;
        }
    }

    if (output->container->oformat->flags & AVFMT_GLOBALHEADER)
        output->container->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    if (!(output->container->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&output->container->pb, path, AVIO_FLAG_WRITE) < 0)
        {
            printf("could not open the output file");
            return NULL;
        }
    }

    AVDictionary* muxer_opts = NULL;

    if (avformat_write_header(output->container, &muxer_opts) < 0)
    {
        printf("an error occurred when opening output file");
        return NULL;
    }

    return output;
}

file* create_output(int streams, const char* filename)
{
    int res;

    //file* output = malloc(sizeof(file));
    file* output = calloc(sizeof(file), 1);
    if (!output)
    {
        return NULL;
    }
    res = avformat_alloc_output_context2(&output->container, NULL, NULL, filename);
    if (res < 0)
    {
        printf("Failed opening output\n");
        return NULL;
    }

    output->codec = av_calloc(streams, sizeof(AVCodecContext*));

    if (!output->codec)
    {
        printf("Failed allocating ram for codec\n");
        return NULL;
    }

    for (int stream = 0; stream < streams; stream++)
    {
        output->codec[stream] = NULL;
    }

    return output;
}

使用 ffmpeg c 连接视频和音频时如何计算 pts 和 dts

问题描述

1 个解决方案

解决方案1
0 2021-12-23 22:58:02

使用 ffmpeg c 连接视频和音频时如何计算 pts 和 dts

问题描述

1 个解决方案

解决方案1 0 2021-12-23 22:58:02

解决方案1
0 2021-12-23 22:58:02