![](/img/trans.png)
[英]No audio when encoding PCM to AAC using MediaCodec and MediaMuxer
[英]Write a PCM encoded sounds with silence gaps using MediaCodec and MediaMuxer
我正在尝试在 Android 上制作一个简单的“点击跟踪”文件渲染器。 我有一个声音的 PCM 编码数据和一些有限间隙序列作为输入(表示为ClickTrack
类)。 我想要一个可播放的.m4a
文件作为输出,该声音在正确渲染的间隙上重复。
问题是我得到了一个处于半损坏状态的文件——它在开始时尽可能快地播放所有重复的声音,然后在曲目的整个过程中保持沉默。 曲目的持续时间恰好是正确的,因此演示时间似乎是正确的。
现在代码:
fun render(clickTrack: ClickTrack, onProgress: (Float) -> Unit, onFinished: () -> Unit): File? {
var muxer: MediaMuxer? = null
var codec: MediaCodec? = null
try {
val audioFormat = MediaFormat.createAudioFormat(MediaFormat.MIMETYPE_AUDIO_AAC, 44100, 2)
.apply {
setInteger(MediaFormat.KEY_BIT_RATE, 96 * 1024)
}
val outputFile = File.createTempFile("click_track_export", ".m4a", context.cacheDir)
muxer = MediaMuxer(outputFile.path, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
val codecName = MediaCodecList(MediaCodecList.REGULAR_CODECS).findEncoderForFormat(audioFormat)!!
codec = MediaCodec.createByCodecName(codecName)
codec.configure(audioFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
codec.start()
// Converts click track to sequence of sound buffers (all the same) with
// timestamps (computed using gaps) for convenience. Gaps are not presented
// in buffers in order to conserve memory
val samples = clickTrack.toSamples()
val bytesToWrite = samples.sumOf { it.data.data.size.toLong() }
val bufferInfo = MediaCodec.BufferInfo()
var bytesWritten = 0L
var index = 0
var endOfInput = samples.isEmpty()
var endOfOutput = samples.isEmpty()
var sample = samples.getOrNull(index)
var sampleBuffer: ByteBuffer? = null
while (!endOfInput || !endOfOutput) {
if (!endOfInput) {
if (sampleBuffer == null || !sampleBuffer.hasRemaining()) {
sample = samples[index]
sampleBuffer = ByteBuffer.wrap(samples[index].data.data)
++index
}
sample!!
sampleBuffer!!
val inputBufferIndex = codec.dequeueInputBuffer(0L)
if (inputBufferIndex >= 0) {
val inputBuffer = codec.getInputBuffer(inputBufferIndex)!!
while (sampleBuffer.hasRemaining() && inputBuffer.hasRemaining()) {
inputBuffer.put(sampleBuffer.get())
++bytesWritten
}
onProgress(bytesWritten.toFloat() / bytesToWrite)
endOfInput = !sampleBuffer.hasRemaining() && index == samples.size
codec.queueInputBuffer(
inputBufferIndex,
0,
inputBuffer.position(),
sample.timestampUs,
if (endOfInput) MediaCodec.BUFFER_FLAG_END_OF_STREAM else 0
)
}
}
if (!endOfOutput) {
val outputBufferIndex = codec.dequeueOutputBuffer(bufferInfo, 0L)
if (outputBufferIndex >= 0) {
val outputBuffer = codec.getOutputBuffer(outputBufferIndex)!!
muxer.writeSampleData(0, outputBuffer, bufferInfo)
codec.releaseOutputBuffer(outputBufferIndex, false)
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
// Not using `audioFormat` because of https://developer.android.com/reference/android/media/MediaCodec#CSD
muxer.addTrack(codec.outputFormat)
muxer.start()
}
endOfOutput = bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0
}
}
return outputFile
} catch (t: Throwable) {
Timber.e(t, "Failed to render track")
} finally {
try {
codec?.stop()
} catch (t: Throwable) {
Timber.e(t, "Failed to stop code")
} finally {
codec?.release()
}
try {
muxer?.stop()
} catch (t: Throwable) {
Timber.e(t, "Failed to stop muxer")
} finally {
muxer?.release()
}
onFinished()
}
return null
}
// Classes descriptions
class Sample(
val data: PcmData,
val timestampUs: Long,
)
class PcmData(
val pcmEncoding: Int,
val sampleRate: Int,
val channelCount: Int,
val data: ByteArray,
)
原来我误解了queueInputBuffer
方法中的presentationTimeUs
参数。 它不会像我想的那样为你写静音帧。 如果您碰巧有 B 帧等,这只是编码器/复用器用于 av 同步和排序的提示。
对于仅音频文件,我将其全部 这实际上是错误的,并且不适用于 Android Marshmallow。 无论哪种方式,您都应该计算足够的演示时间。0L
,并且效果很好。
另一个错误是写入的静音不是 PCM 帧大小的倍数(即样本大小 * 通道数)。 如果你不这样做,你最终会出现音频故障。
所以最后我得到了这个代码来生成完整的ByteArray
以供MediaCodec
使用:
private fun ClickTrack.render(): ByteArray {
val result = mutableListOf<Byte>()
for (event in toPlayerEvents()) {
// Object containing raw byte array and some meta information like sample rate and channel count
val pcm = event.sound
// Compute overall frame count that can fit in event.duration
// framesPerSecond = sampleRate / channelCount
val maxFramesCount = (event.duration.toDouble(DurationUnit.SECONDS) * pcm.framesPerSecond).toInt()
// Compute frames for sound. If sound is longer than event duration, truncate it
// bytesPerFrame = bytesPerSample (1 for ENCODING_PCM_8BIT, 2 for ENCODING_PCM_16BIT and so on) * channelCount
val framesOfSound = (pcm.data.size / pcm.bytesPerFrame).coerceAtMost(maxFramesCount)
// The rest is just silent frames
val framesOfSilence = maxFramesCount - framesOfSound
result += pcm.data.slice(0 until framesOfSound * pcm.bytesPerFrame)
result += ByteArray(framesOfSilence * pcm.bytesPerFrame).asIterable()
}
return result.toByteArray()
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.