碎片化MP4 - 在浏览器中播放问题

Question

我尝试用原始H264视频数据创建碎片化的MP4，这样我就可以在互联网浏览器的播放器中播放它了。 我的目标是创建实时流媒体系统，媒体服务器将片段化的MP4片段发送到浏览器。 服务器将缓冲来自RaspberryPi摄像头的输入数据，该摄像头将视频作为H264帧发送。 然后它将复用该视频数据并使其可供客户端使用。 浏览器将使用Media Source Extensions播放媒体数据（由服务器复制并通过websocket发送）。

出于测试目的，我编写了以下代码片段（使用我在intenet中找到的许多示例）：

使用avcodec的C ++应用程序，它将原始H264视频复用到碎片MP4并将其保存到文件中：

#define READBUFSIZE 4096
#define IOBUFSIZE 4096
#define ERRMSGSIZE 128

#include <cstdint>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>

extern "C"
{
    #include <libavformat/avformat.h>
    #include <libavutil/error.h>
    #include <libavutil/opt.h>
}

enum NalType : uint8_t
{
    //NALs containing stream metadata
    SEQ_PARAM_SET = 0x7,
    PIC_PARAM_SET = 0x8
};

std::vector<uint8_t> outputData;

int mediaMuxCallback(void *opaque, uint8_t *buf, int bufSize)
{
    outputData.insert(outputData.end(), buf, buf + bufSize);
    return bufSize;
}

std::string getAvErrorString(int errNr)
{
    char errMsg[ERRMSGSIZE];
    av_strerror(errNr, errMsg, ERRMSGSIZE);
    return std::string(errMsg);
}

int main(int argc, char **argv)
{
    if(argc < 2)
    {
        std::cout << "Missing file name" << std::endl;
        return 1;
    }

    std::fstream file(argv[1], std::ios::in | std::ios::binary);
    if(!file.is_open())
    {
        std::cout << "Couldn't open file " << argv[1] << std::endl;
        return 2;
    }

    std::vector<uint8_t> inputMediaData;
    do
    {
        char buf[READBUFSIZE];
        file.read(buf, READBUFSIZE);

        int size = file.gcount();
        if(size > 0)
            inputMediaData.insert(inputMediaData.end(), buf, buf + size);
    } while(!file.eof());
    file.close();

    //Initialize avcodec
    av_register_all();
    uint8_t *ioBuffer;
    AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
    AVCodecContext *codecCtxt = avcodec_alloc_context3(codec);
    AVCodecParserContext *parserCtxt = av_parser_init(AV_CODEC_ID_H264);
    AVOutputFormat *outputFormat = av_guess_format("mp4", nullptr, nullptr);
    AVFormatContext *formatCtxt;
    AVIOContext *ioCtxt;
    AVStream *videoStream;

    int res = avformat_alloc_output_context2(&formatCtxt, outputFormat, nullptr, nullptr);
    if(res < 0)
    {
        std::cout << "Couldn't initialize format context; the error was: " << getAvErrorString(res) << std::endl;
        return 3;
    }

    if((videoStream = avformat_new_stream( formatCtxt, avcodec_find_encoder(formatCtxt->oformat->video_codec) )) == nullptr)
    {
        std::cout << "Couldn't initialize video stream" << std::endl;
        return 4;
    }
    else if(!codec)
    {
        std::cout << "Couldn't initialize codec" << std::endl;
        return 5;
    }
    else if(codecCtxt == nullptr)
    {
        std::cout << "Couldn't initialize codec context" << std::endl;
        return 6;
    }
    else if(parserCtxt == nullptr)
    {
        std::cout << "Couldn't initialize parser context" << std::endl;
        return 7;
    }
    else if((ioBuffer = (uint8_t*)av_malloc(IOBUFSIZE)) == nullptr)
    {
        std::cout << "Couldn't allocate I/O buffer" << std::endl;
        return 8;
    }
    else if((ioCtxt = avio_alloc_context(ioBuffer, IOBUFSIZE, 1, nullptr, nullptr, mediaMuxCallback, nullptr)) == nullptr)
    {
        std::cout << "Couldn't initialize I/O context" << std::endl;
        return 9;
    }

    //Set video stream data
    videoStream->id = formatCtxt->nb_streams - 1;
    videoStream->codec->width = 1280;
    videoStream->codec->height = 720;
    videoStream->time_base.den = 60; //FPS
    videoStream->time_base.num = 1;
    videoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    formatCtxt->pb = ioCtxt;

    //Retrieve SPS and PPS for codec extdata
    const uint32_t synchMarker = 0x01000000;
    unsigned int i = 0;
    int spsStart = -1, ppsStart = -1;
    uint16_t spsSize = 0, ppsSize = 0;
    while(spsSize == 0 || ppsSize == 0)
    {
        uint32_t *curr =  (uint32_t*)(inputMediaData.data() + i);
        if(*curr == synchMarker)
        {
            unsigned int currentNalStart = i;
            i += sizeof(uint32_t);
            uint8_t nalType = inputMediaData.data()[i] & 0x1F;
            if(nalType == SEQ_PARAM_SET)
                spsStart = currentNalStart;
            else if(nalType == PIC_PARAM_SET)
                ppsStart = currentNalStart;

            if(spsStart >= 0 && spsSize == 0 && spsStart != i)
                spsSize = currentNalStart - spsStart;
            else if(ppsStart >= 0 && ppsSize == 0 && ppsStart != i)
                ppsSize = currentNalStart - ppsStart;
        }
        ++i;
    }

    videoStream->codec->extradata = inputMediaData.data() + spsStart;
    videoStream->codec->extradata_size = ppsStart + ppsSize;

    //Write main header
    AVDictionary *options = nullptr;
    av_dict_set(&options, "movflags", "frag_custom+empty_moov", 0);
    res = avformat_write_header(formatCtxt, &options);
    if(res < 0)
    {
        std::cout << "Couldn't write container main header; the error was: " << getAvErrorString(res) << std::endl;
        return 10;
    }

    //Retrieve frames from input video and wrap them in container
    int currentInputIndex = 0;
    int framesInSecond = 0;
    while(currentInputIndex < inputMediaData.size())
    {
        uint8_t *frameBuffer;
        int frameSize;
        res = av_parser_parse2(parserCtxt, codecCtxt, &frameBuffer, &frameSize, inputMediaData.data() + currentInputIndex,
            inputMediaData.size() - currentInputIndex, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if(frameSize == 0) //No more frames while some data still remains (is that even possible?)
        {
            std::cout << "Some data left unparsed: " << std::to_string(inputMediaData.size() - currentInputIndex) << std::endl;
            break;
        }

        //Prepare packet with video frame to be dumped into container
        AVPacket packet;
        av_init_packet(&packet);
        packet.data = frameBuffer;
        packet.size = frameSize;
        packet.stream_index = videoStream->index;
        currentInputIndex += frameSize;

        //Write packet to the video stream
        res = av_write_frame(formatCtxt, &packet);
        if(res < 0)
        {
            std::cout << "Couldn't write packet with video frame; the error was: " << getAvErrorString(res) << std::endl;
            return 11;
        }

        if(++framesInSecond == 60) //We want 1 segment per second
        {
            framesInSecond = 0;
            res = av_write_frame(formatCtxt, nullptr); //Flush segment
        }
    }
    res = av_write_frame(formatCtxt, nullptr); //Flush if something has been left

    //Write media data in container to file
    file.open("my_mp4.mp4", std::ios::out | std::ios::binary);
    if(!file.is_open())
    {
        std::cout << "Couldn't open output file " << std::endl;
        return 12;
    }

    file.write((char*)outputData.data(), outputData.size());
    if(file.fail())
    {
        std::cout << "Couldn't write to file" << std::endl;
        return 13;
    }

    std::cout << "Media file muxed successfully" << std::endl;
    return 0;
}

（我硬编码了一些值，例如视频尺寸或帧速率，但正如我所说，这只是一个测试代码。）

使用MSE的简单HTML网页播放我的碎片MP4

<!DOCTYPE html>
<html>
<head>
    <title>Test strumienia</title>
</head>
<body>
    <video width="1280" height="720" controls>
    </video>
</body>
<script>
var vidElement = document.querySelector('video');

if (window.MediaSource) {
  var mediaSource = new MediaSource();
  vidElement.src = URL.createObjectURL(mediaSource);
  mediaSource.addEventListener('sourceopen', sourceOpen);
} else {
  console.log("The Media Source Extensions API is not supported.")
}

function sourceOpen(e) {
  URL.revokeObjectURL(vidElement.src);
  var mime = 'video/mp4; codecs="avc1.640028"';
  var mediaSource = e.target;
  var sourceBuffer = mediaSource.addSourceBuffer(mime);
  var videoUrl = 'my_mp4.mp4';
  fetch(videoUrl)
    .then(function(response) {
      return response.arrayBuffer();
    })
    .then(function(arrayBuffer) {
      sourceBuffer.addEventListener('updateend', function(e) {
        if (!sourceBuffer.updating && mediaSource.readyState === 'open') {
          mediaSource.endOfStream();
        }
      });
      sourceBuffer.appendBuffer(arrayBuffer);
    });
}
</script>
</html>

我的C ++应用程序生成的输出MP4文件可以在MPC中播放，但它不能在我测试过的任何Web浏览器中播放。 它也没有任何持续时间（MPC保持显示00:00）。

为了比较我从上面描述的C ++应用程序获得的输出MP4文件，我还使用FFMPEG从原始H264流的相同源文件创建碎片化的MP4文件。 我使用以下命令：

ffmpeg -r 60 -i input.h264 -c:v copy -f mp4 -movflags empty_moov+default_base_moof+frag_keyframe test.mp4

FFMPEG生成的此文件由我用于测试的每个Web浏览器正确播放。 它也有正确的持续时间（但它也有尾随原子，无论如何都不会出现在我的直播中，因为我需要一个直播流，它首先不会有任何固定的持续时间）。

两个文件的MP4原子看起来非常相似（肯定会有相同的avcc部分）。 有趣的是（但不确定它是否有任何重要性），两个文件都具有与输入文件不同的NAL格式（RPI摄像机以附件B格式生成视频流，而输出MP4文件包含AVCC格式的NAL ......或者至少它看起来就像我将mdat原子与输入H264数据进行比较一样。

我假设我需要为avcodec设置一些字段（或几个字段），以使其生成可由浏览器播放器正确解码和播放的视频流。 但是我需要设置哪些字段？ 或者问题可能在其他地方？ 我没有想法了。

编辑1：正如所建议的那样，我使用十六进制编辑器调查了两个MP4文件的二进制内容（由我的应用程序和FFMPEG工具生成）。 我能确认的是：

两个文件都有相同的avcc部分（它们完全匹配并且采用AVCC格式，我逐字节地分析它并且没有错误）
两个文件都有AVCC格式的NAL（我仔细查看了mdat原子，它们在两个MP4文件之间没有差异）

所以我猜我的代码中的extradata创建没有任何问题 - avcodec正确地处理它，即使我只是用SPS和PPS NAL提供它。 它自己转换它们，所以我不需要手工完成它。 不过，我原来的问题仍然存在。

编辑2：我取得了部分成功 - 我的应用程序生成的MP4现在可以在Firefox中播放。 我将此行添加到代码中（以及其他流初始化）：

videoStream->codec->time_base = videoStream->time_base;

所以现在我的代码的这一部分看起来像这样：

//Set video stream data
videoStream->id = formatCtxt->nb_streams - 1;
videoStream->codec->width = 1280;
videoStream->codec->height = 720;
videoStream->time_base.den = 60; //FPS
videoStream->time_base.num = 1;
videoStream->codec->time_base = videoStream->time_base;
videoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
formatCtxt->pb = ioCtxt;

Answer 1

我终于找到了解决方案。 我的MP4现在可以在Chrome中播放（同时还在其他经过测试的浏览器中播放）。

在Chrome中Chrome：// media-internals /显示MSE日志（排序）。 当我看到那里时，我发现了一些针对我的测试玩家的警告：

ISO-BMFF container metadata for video frame indicates that the frame is not a keyframe, but the video frame contents indicate the opposite.

这让我思考并鼓励为具有关键帧的数据包设置AV_PKT_FLAG_KEY 。 我将以下代码添加到填充AVPacket结构的部分：

    //Check if keyframe field needs to be set
    int allowedNalsCount = 3; //In one packet there would be at most three NALs: SPS, PPS and video frame
    packet.flags = 0;
    for(int i = 0; i < frameSize && allowedNalsCount > 0; ++i)
    {
        uint32_t *curr =  (uint32_t*)(frameBuffer + i);
        if(*curr == synchMarker)
        {
            uint8_t nalType = frameBuffer[i + sizeof(uint32_t)] & 0x1F;
            if(nalType == KEYFRAME)
            {
                std::cout << "Keyframe detected at frame nr " << framesTotal << std::endl;
                packet.flags = AV_PKT_FLAG_KEY;
                break;
            }
            else
                i += sizeof(uint32_t) + 1; //We parsed this already, no point in doing it again

            --allowedNalsCount;
        }
    }

在我的情况下， KEYFRAME常量结果为0x5 （Slice IDR）。

Answer 2

两个文件的MP4原子看起来非常相似（肯定会有相同的avcc部分）

仔细检查一下，提供的代码不同于我。

有趣的是（但不确定它是否有任何重要性），两个文件都具有与输入文件不同的NAL格式（RPI摄像机以附件B格式生成视频流，而输出MP4文件包含AVCC格式的NAL ......或者至少它看起来就像我将mdat原子与输入H264数据进行比较一样。

这非常重要，mp4不适用于附件b。

Answer 3

您需要使用AVC解码器配置记录填写extradata，而不仅仅是SPS / PPS

这是记录的样子： AVCDCR

Answer 4

我们可以在[Chrome源代码]（ https://chromium.googlesource.com/chromium/src/+/refs/heads/master/media/formats/mp4/mp4_stream_parser.cc#799 ）中找到此解释“chrome media源代码“：

// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.


  // Use |analysis.is_keyframe|, if it was actually determined, for logging
  // if the analysis mismatches the container's keyframe metadata for
  // |frame_buf|.
  if (analysis.is_keyframe.has_value() &&
      is_keyframe != analysis.is_keyframe.value()) {
    LIMITED_MEDIA_LOG(DEBUG, media_log_, num_video_keyframe_mismatches_,
                      kMaxVideoKeyframeMismatchLogs)
        << "ISO-BMFF container metadata for video frame indicates that the "
           "frame is "
        << (is_keyframe ? "" : "not ")
        << "a keyframe, but the video frame contents indicate the "
           "opposite.";
    // As of September 2018, it appears that all of Edge, Firefox, Safari
    // work with content that marks non-avc-keyframes as a keyframe in the
    // container. Encoders/muxers/old streams still exist that produce
    // all-keyframe mp4 video tracks, though many of the coded frames are
    // not keyframes (likely workaround due to the impact on low-latency
    // live streams until https://crbug.com/229412 was fixed).  We'll trust
    // the AVC frame's keyframe-ness over the mp4 container's metadata if
    // they mismatch. If other out-of-order codecs in mp4 (e.g. HEVC, DV)
    // implement keyframe analysis in their frame_bitstream_converter, we'll
    // similarly trust that analysis instead of the mp4.
    is_keyframe = analysis.is_keyframe.value();
  }

正如代码评论所示，chrome信任AVC框架关于mp4容器元数据的关键帧。 因此H264 / HEVC中的nalu类型应该比mp4容器盒sdtp＆trun描述更重要。

碎片化MP4 - 在浏览器中播放问题

问题描述

4 个解决方案

解决方案1
2 已采纳 2019-01-14 19:41:50

解决方案2
0 2019-01-10 03:58:39

解决方案3
0 2019-01-10 10:21:59

解决方案4
0 2019-06-12 01:59:47

碎片化MP4 - 在浏览器中播放问题

问题描述

4 个解决方案

解决方案1 2 已采纳 2019-01-14 19:41:50

解决方案2 0 2019-01-10 03:58:39

解决方案3 0 2019-01-10 10:21:59

解决方案4 0 2019-06-12 01:59:47

解决方案1
2 已采纳 2019-01-14 19:41:50

解决方案2
0 2019-01-10 03:58:39

解决方案3
0 2019-01-10 10:21:59

解决方案4
0 2019-06-12 01:59:47