![](/img/trans.png)
[英]No audio when encoding PCM to AAC using MediaCodec and MediaMuxer
[英]Write a PCM encoded sounds with silence gaps using MediaCodec and MediaMuxer
我正在嘗試在 Android 上制作一個簡單的“點擊跟蹤”文件渲染器。 我有一個聲音的 PCM 編碼數據和一些有限間隙序列作為輸入(表示為ClickTrack
類)。 我想要一個可播放的.m4a
文件作為輸出,該聲音在正確渲染的間隙上重復。
問題是我得到了一個處於半損壞狀態的文件——它在開始時盡可能快地播放所有重復的聲音,然后在曲目的整個過程中保持沉默。 曲目的持續時間恰好是正確的,因此演示時間似乎是正確的。
現在代碼:
fun render(clickTrack: ClickTrack, onProgress: (Float) -> Unit, onFinished: () -> Unit): File? {
var muxer: MediaMuxer? = null
var codec: MediaCodec? = null
try {
val audioFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC, 44100, 2)
.apply {
setInteger(MediaFormat.KEY_BIT_RATE, 96 * 1024)
}
val outputFile = File.createTempFile("click_track_export", ".m4a", context.cacheDir)
muxer = MediaMuxer(outputFile.path, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
val codecName = MediaCodecList(MediaCodecList.REGULAR_CODECS).findEncoderForFormat(audioFormat)!!
codec = MediaCodec.createByCodecName(codecName)
codec.configure(audioFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
codec.start()
// Converts click track to sequence of sound buffers (all the same) with
// timestamps (computed using gaps) for convenience. Gaps are not presented
// in buffers in order to conserve memory
val samples = clickTrack.toSamples()
val bytesToWrite = samples.sumOf { it.data.data.size.toLong() }
val bufferInfo = MediaCodec.BufferInfo()
var bytesWritten = 0L
var index = 0
var endOfInput = samples.isEmpty()
var endOfOutput = samples.isEmpty()
var sample = samples.getOrNull(index)
var sampleBuffer: ByteBuffer? = null
while (!endOfInput || !endOfOutput) {
if (!endOfInput) {
if (sampleBuffer == null || !sampleBuffer.hasRemaining()) {
sample = samples[index]
sampleBuffer = ByteBuffer.wrap(samples[index].data.data)
++index
}
sample!!
sampleBuffer!!
val inputBufferIndex = codec.dequeueInputBuffer(0L)
if (inputBufferIndex >= 0) {
val inputBuffer = codec.getInputBuffer(inputBufferIndex)!!
while (sampleBuffer.hasRemaining() && inputBuffer.hasRemaining()) {
inputBuffer.put(sampleBuffer.get())
++bytesWritten
}
onProgress(bytesWritten.toFloat() / bytesToWrite)
endOfInput = !sampleBuffer.hasRemaining() && index == samples.size
codec.queueInputBuffer(
inputBufferIndex,
0,
inputBuffer.position(),
sample.timestampUs,
if (endOfInput) MediaCodec.BUFFER_FLAG_END_OF_STREAM else 0
)
}
}
if (!endOfOutput) {
val outputBufferIndex = codec.dequeueOutputBuffer(bufferInfo, 0L)
if (outputBufferIndex >= 0) {
val outputBuffer = codec.getOutputBuffer(outputBufferIndex)!!
muxer.writeSampleData(0, outputBuffer, bufferInfo)
codec.releaseOutputBuffer(outputBufferIndex, false)
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// Not using `audioFormat` because of https://developer.android.com/reference/android/media/MediaCodec#CSD
muxer.addTrack(codec.outputFormat)
muxer.start()
}
endOfOutput = bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0
}
}
return outputFile
} catch (t: Throwable) {
Timber.e(t, "Failed to render track")
} finally {
try {
codec?.stop()
} catch (t: Throwable) {
Timber.e(t, "Failed to stop code")
} finally {
codec?.release()
}
try {
muxer?.stop()
} catch (t: Throwable) {
Timber.e(t, "Failed to stop muxer")
} finally {
muxer?.release()
}
onFinished()
}
return null
}
// Classes descriptions
class Sample(
val data: PcmData,
val timestampUs: Long,
)
class PcmData(
val pcmEncoding: Int,
val sampleRate: Int,
val channelCount: Int,
val data: ByteArray,
)
原來我誤解了queueInputBuffer
方法中的presentationTimeUs
參數。 它不會像我想的那樣為你寫靜音幀。 如果您碰巧有 B 幀等,這只是編碼器/復用器用於 av 同步和排序的提示。
對於僅音頻文件,我將其全部 這實際上是錯誤的,並且不適用於 Android Marshmallow。 無論哪種方式,您都應該計算足夠的演示時間。0L
,並且效果很好。
另一個錯誤是寫入的靜音不是 PCM 幀大小的倍數(即樣本大小 * 通道數)。 如果你不這樣做,你最終會出現音頻故障。
所以最后我得到了這個代碼來生成完整的ByteArray
以供MediaCodec
使用:
private fun ClickTrack.render(): ByteArray {
val result = mutableListOf<Byte>()
for (event in toPlayerEvents()) {
// Object containing raw byte array and some meta information like sample rate and channel count
val pcm = event.sound
// Compute overall frame count that can fit in event.duration
// framesPerSecond = sampleRate / channelCount
val maxFramesCount = (event.duration.toDouble(DurationUnit.SECONDS) * pcm.framesPerSecond).toInt()
// Compute frames for sound. If sound is longer than event duration, truncate it
// bytesPerFrame = bytesPerSample (1 for ENCODING_PCM_8BIT, 2 for ENCODING_PCM_16BIT and so on) * channelCount
val framesOfSound = (pcm.data.size / pcm.bytesPerFrame).coerceAtMost(maxFramesCount)
// The rest is just silent frames
val framesOfSilence = maxFramesCount - framesOfSound
result += pcm.data.slice(0 until framesOfSound * pcm.bytesPerFrame)
result += ByteArray(framesOfSilence * pcm.bytesPerFrame).asIterable()
}
return result.toByteArray()
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.