Android MediaMuxer 由于假定的乱序帧而崩溃

Question

我想从麦克风（以及蓝牙耳机麦克风，在路上）录制音轨并将其转换为MPEG4 AAC格式。 根据我的项目中与后端通信规范的要求，必须将音频分成短（0.5 - 2 秒长）块。 作为一个简化的示例，我只是将这些块作为文件保存在提供的代码中的缓存中（而不将它们发送到后端）。

为此，我使用AudioRecord以PCM-16格式录制音频，然后使用MediaCodec将其转换为AAC ，最后使用MediaMuxer将其保存为MPEG4文件。

示例代码（基于此示例）：

private const val TAG = "RECORDING"

private const val AUDIO_CHUNK_LENGTH_MS = 500L

private const val RECORDER_SAMPLERATE = 44100
private const val RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO
private const val RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT
private val BUFFER_SIZE = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE, RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING)

class RecordingUtil2(
    val context: Context,
    private val dispatcher: CoroutineDispatcher = Dispatchers.Default
) {

    lateinit var audioRecord: AudioRecord
    lateinit var encoder: MediaCodec
    lateinit var mediaMuxer: MediaMuxer
    var trackId: Int = 0

    var chunkCuttingJob: Job? = null
    var recordingJob: Job? = null

    var audioStartTimeNs: Long = 0

    var currentFile: File? = null

    var chunkEnd = false

    private fun prepareRecorder() {
        audioRecord = AudioRecord(
            MediaRecorder.AudioSource.MIC, RECORDER_SAMPLERATE,
            AudioFormat.CHANNEL_IN_MONO,
            AudioFormat.ENCODING_PCM_16BIT, BUFFER_SIZE * 10
        )
    }

    private suspend fun startRecording() {
        Timber.tag(TAG).i("started recording, buffer size $BUFFER_SIZE")
        createTempFile()
        prepareRecorder()

        try {
            encoder = createMediaCodec(BUFFER_SIZE)
            encoder.start()
            createMuxer(encoder.outputFormat, currentFile!!)
            mediaMuxer.start()
        } catch (exception: Exception) {
            Timber.tag(TAG).w(exception)
        }

        audioStartTimeNs = System.nanoTime()
        audioRecord.startRecording()

        var bufferInfo = MediaCodec.BufferInfo()

        chunkCuttingJob = CoroutineScope(dispatcher).launch {
            while (isActive) {
                delay(AUDIO_CHUNK_LENGTH_MS)
                cutChunk()
            }
        }
        recordingJob = CoroutineScope(dispatcher).launch {
            val buffer2 = ByteArray(BUFFER_SIZE)

            do {
                val bytes = audioRecord.read(buffer2, 0, BUFFER_SIZE)

                if (bytes != BUFFER_SIZE) {
                    Timber.tag(TAG).w("read less bytes than full buffer ($bytes/$BUFFER_SIZE)")
                }

                encodeRawAudio(encoder, mediaMuxer, buffer2, bytes, bufferInfo, !isActive || chunkEnd)

                if (chunkEnd) {
                    recreateEncoderAndMuxer()
                    bufferInfo = MediaCodec.BufferInfo()
                    // delay here causes crash after first cut
                    //delay(100)
                }
                // delay here fixes crash in most cases
                //delay(100)
            } while(isActive)
        }
    }

    private fun recreateEncoderAndMuxer() {
        createTempFile()
        chunkEnd = false
        audioStartTimeNs = System.nanoTime()
        encoder.stop()
        encoder.release()
        encoder = createMediaCodec(BUFFER_SIZE)
        encoder.start()
        mediaMuxer.stop()
        mediaMuxer.release()
        createMuxer(encoder.outputFormat, currentFile!!)
        mediaMuxer.start()
    }

    private fun encodeRawAudio(encoder: MediaCodec, muxer: MediaMuxer, bytes: ByteArray, byteCount: Int, bufferInfo: MediaCodec.BufferInfo, last: Boolean = false) {
        with(encoder) {
            val infputBufferIndex = dequeueInputBuffer(10_000)
            val inputBuffer = getInputBuffer(infputBufferIndex)
            inputBuffer?.clear()
            inputBuffer?.put(bytes)
            val presentationTimeUs: Long = (System.nanoTime() - audioStartTimeNs) / 1000

            queueInputBuffer(infputBufferIndex, 0, byteCount, presentationTimeUs, if (last) BUFFER_FLAG_END_OF_STREAM else 0)

            var outputBufferIndex = dequeueOutputBuffer(bufferInfo, 0)
            Timber.tag(TAG).d("encoding $byteCount bytes, last = $last, time: $presentationTimeUs, buffer time: ${bufferInfo.presentationTimeUs}")

            while (outputBufferIndex != MediaCodec.INFO_TRY_AGAIN_LATER) {
                if (outputBufferIndex >= 0) {
                    val outputBuffer = getOutputBuffer(outputBufferIndex)

                    outputBuffer?.position(bufferInfo.offset)
                    outputBuffer?.limit(bufferInfo.offset + bufferInfo.size)

                    if (bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG != MediaCodec.BUFFER_FLAG_CODEC_CONFIG) {
                        val data = ByteArray(outputBuffer!!.remaining())
                        outputBuffer.get(data)

                        muxer.writeSampleData(trackId, outputBuffer, bufferInfo)
                    }

                    outputBuffer?.clear()
                    releaseOutputBuffer(outputBufferIndex, false)
                }

                outputBufferIndex = encoder.dequeueOutputBuffer(bufferInfo, 0)
            }
        }
    }

    private fun cutChunk() {
        Timber.tag(TAG).i("cutting chunk")
        chunkEnd = true
    }

    private fun stopRecording() {
        Timber.tag(TAG).i("stopped recording")
        chunkCuttingJob?.cancel()
        chunkCuttingJob = null
        recordingJob?.cancel()
        recordingJob = null
        audioRecord.stop()
        encoder.release()
        mediaMuxer.stop()
        mediaMuxer.release()
    }

    suspend fun record(isRecording: Boolean) {
        if (isRecording) {
            startRecording()
        } else {
            stopRecording()
        }
    }

    private fun createMediaCodec(bufferSize: Int, existing: MediaCodec? = null): MediaCodec {
        val mediaFormat = MediaFormat().apply {
            setString(MediaFormat.KEY_MIME, MediaFormat.MIMETYPE_AUDIO_AAC)
            setInteger(MediaFormat.KEY_BIT_RATE, 32000)
            setInteger(MediaFormat.KEY_CHANNEL_COUNT, 1)
            setInteger(MediaFormat.KEY_SAMPLE_RATE, RECORDER_SAMPLERATE)
            setInteger(MediaFormat.KEY_AAC_PROFILE, CodecProfileLevel.AACObjectLC)
            setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, bufferSize)
        }

        val encoderString = MediaCodecList(MediaCodecList.REGULAR_CODECS).findEncoderForFormat(mediaFormat)

        Timber.tag(TAG).d("chosen codec: $encoderString")
        val mediaCodec = existing ?: MediaCodec.createByCodecName(encoderString)

        try {
            mediaCodec.configure(mediaFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
        } catch (e: Exception) {
            Timber.tag(TAG).w(e)
            mediaCodec.release()
        }
        return mediaCodec
    }

    private fun createMuxer(format: MediaFormat, file: File) {
        try {
            file.createNewFile()
            mediaMuxer = MediaMuxer(file.absolutePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
            trackId = mediaMuxer.addTrack(format)
        } catch (e: java.lang.Exception) {
            Timber.tag(TAG).e(e)
        }
    }

    private var currentIndex: Int = 0

    private fun createTempFile() {
        currentFile = File(context.cacheDir, "$currentIndex.m4a").also { it.createNewFile() }
        currentIndex++
    }
}

我在协程中运行此代码，例如：

class MyViewModel : ViewModel() {
    fun startRecording() {
        val recordingUtil = RecordingUtil2(...)
        viewModelScope.launch(Dispatchers.Default) {
            recordingUtil.record(true)
        }
    }
}

我面临的问题是，在将几个块保存到连续文件中后， MediaMuxer因MPEG4Writer中的异常而崩溃：

E/MPEG4Writer: do not support out of order frames (timestamp: 13220 < last: 23219 for Audio track

然而，正如您在提供的代码中看到的那样，时间戳是增量生成的，并以正确的顺序用作MediaCodec.queueInputBuffer(...)参数。

有趣的是（并且可能暗示出了什么问题）是来自 MPEG4Writer 的异常消息说最后一个时间戳是 23219每次，就像它是一个常数一样，而从本机平台代码来看，它确实应该显示前一帧时间戳，这不太可能是比 0 大得多的常数。

来自崩溃的更多日志（用于上下文）

I/MPEG4Writer: Normal stop process
D/MPEG4Writer: Audio track stopping. Stop source
I/MPEG4Writer: Received total/0-length (22/1) buffers and encoded 22 frames. - Audio
D/MPEG4Writer: Audio track source stopping
D/MPEG4Writer: Audio track source stopped
I/MPEG4Writer: Audio track drift time: 0 us
D/MPEG4Writer: Audio track stopped. Stop source
D/MPEG4Writer: Stopping writer thread
D/MPEG4Writer: 0 chunks are written in the last batch
D/MPEG4Writer: Writer thread stopped
I/MPEG4Writer: Ajust the moov start time from 44099 us -> 44099 us
I/MPEG4Writer: The mp4 file will not be streamable.
D/MPEG4Writer: Audio track stopping. Stop source
D/RECORDING: encoding 3528 bytes, last = false, time: 79102, buffer time: 0
D/RECORDING: encoding 3528 bytes, last = false, time: 85883, buffer time: 0
D/RECORDING: encoding 3528 bytes, last = false, time: 89383, buffer time: 79102
I/MPEG4Writer: setStartTimestampUs: 79102 from Audio track
I/MPEG4Writer: Earliest track starting time: 79102
E/MPEG4Writer: do not support out of order frames (timestamp: 13220 < last: 23219 for Audio track
E/MPEG4Writer: 0 frames to dump timeStamps in Audio track 
I/MPEG4Writer: Received total/0-length (3/0) buffers and encoded 2 frames. - Audio
I/MPEG4Writer: Audio track drift time: 0 us
E/MediaAdapter: pushBuffer called before start
E/AndroidRuntime: FATAL EXCEPTION: DefaultDispatcher-worker-1
E/AndroidRuntime: FATAL EXCEPTION: DefaultDispatcher-worker-1
    Process: com.example, PID: 23499
    java.lang.IllegalStateException: writeSampleData returned an error

成功录制和保存音频块时的日志：

I/MPEG4Writer: Normal stop process
D/MPEG4Writer: Audio track stopping. Stop source
D/MPEG4Writer: Audio track source stopping
I/MPEG4Writer: Received total/0-length (18/0) buffers and encoded 18 frames. - Audio
D/MPEG4Writer: Audio track source stopped
I/MPEG4Writer: Audio track drift time: 0 us
D/MPEG4Writer: Audio track stopped. Stop source
D/MPEG4Writer: Stopping writer thread
D/MPEG4Writer: 0 chunks are written in the last batch
D/MPEG4Writer: Writer thread stopped
I/MPEG4Writer: Ajust the moov start time from 45890 us -> 45890 us
I/MPEG4Writer: The mp4 file will not be streamable.
D/MPEG4Writer: Audio track stopping. Stop source
D/RECORDING: encoding 3528 bytes, last = false, time: 44099, buffer time: 0
D/RECORDING: encoding 3528 bytes, last = false, time: 74366, buffer time: 44099
I/MPEG4Writer: setStartTimestampUs: 44099 from Audio track
I/MPEG4Writer: Earliest track starting time: 44099
D/RECORDING: encoding 3528 bytes, last = false, time: 116122, buffer time: 80805
D/RECORDING: encoding 3528 bytes, last = false, time: 156789, buffer time: 104025
D/RECORDING: encoding 3528 bytes, last = false, time: 196940, buffer time: 152221
D/RECORDING: encoding 3528 bytes, last = false, time: 235010, buffer time: 176108
D/RECORDING: encoding 3528 bytes, last = false, time: 275232, buffer time: 243989
D/RECORDING: encoding 3528 bytes, last = false, time: 316400, buffer time: 267209
D/RECORDING: encoding 3528 bytes, last = false, time: 361290, buffer time: 313871
D/RECORDING: encoding 3528 bytes, last = false, time: 401305, buffer time: 338259
D/RECORDING: encoding 3528 bytes, last = false, time: 441019, buffer time: 412824
D/RECORDING: encoding 3528 bytes, last = false, time: 481193, buffer time: 436044
I/RECORDING: cutting chunk
D/RECORDING: encoding 3528 bytes, last = true, time: 518624, buffer time: 458978
I/MediaCodec: Codec shutdown complete

我注意到来自崩溃场景的日志显示 BufferInfo 包含两个初始帧的时间戳 = 0，而非崩溃日志始终只有一个这样的帧。 然而，我观察到同样的崩溃有时只有一个时间戳 = 0 帧，所以它可能不相关。

有人可以帮我解决这个问题吗？

Answer 1

查看问题Muxing AAC audio with Android's MediaCodec and MediaMuxer 。

最好的猜测：编码器正在对输出做一些事情——可能将一个输入数据包分成两个输出数据包——这需要它合成一个时间戳。 它采用数据包开始的时间戳，并根据比特率和字节数添加一个值。 如果您生成具有相当正确的呈现时间的时间戳，那么当生成“中间”时间戳时，您不应该看到它倒退。 通过@fadden

我引用@fadden 的评论。 它解释了 MediaCodec 产生惊喜的原因，即时间戳不是以某种方式递增的。

所以，你说

然而，正如您在提供的代码中看到的那样，时间戳是增量生成的，并以正确的顺序用作 MediaCodec.queueInputBuffer(...) 参数。

这与您提供单一时间戳的代码无关。 如果您清楚地查看错误消息。

java.lang.IllegalStateException: writeSampleData returned an error

该错误是在writeSampleData方法中引发的。 因此，只需在BufferInfo.presentationTimeUs之前放置一些日志记录即可查看muxer.writeSampleData 。 你会看到惊喜。

Android MediaMuxer 由于假定的乱序帧而崩溃

问题描述

1 个解决方案

解决方案1
1 已采纳 2022-07-10 13:44:25

Android MediaMuxer 由于假定的乱序帧而崩溃

问题描述

1 个解决方案

解决方案1 1 已采纳 2022-07-10 13:44:25

解决方案1
1 已采纳 2022-07-10 13:44:25