如何使用 opus 編碼和解碼音頻數據？

Question

我正在處理語音聊天，我需要壓縮我的音頻數據。 我通過 Qt 框架記錄和播放音頻數據。 如果我錄制和播放音頻數據而不壓縮它，一切都很好。 如果我壓縮、解壓縮和播放音頻數據，我只會聽到破裂的聲音。

編輯：我查看了演示代碼並嘗試使用該代碼。 我可以聽到一些東西，但它非常滯后。 如果我將 pcm_bytes 的大小增加到例如 40000 聽起來更好，但我的聲音仍然滯后和破裂的聲音。

這是行（底部的audioinput.cpp）：

speaker->write((const char*)pcm_bytes,3840);

codecopus.cpp：

#include "codecopus.h"

CodecOpus::CodecOpus()
{

}

void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
    int error;
    decoderState = opus_decoder_create(samplingRate,channels,&error);
    if(error == OPUS_OK){
        std::cout << "Created Opus Decoder struct" << std::endl;
    }

}

void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
    int error;
    encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
    error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
    if(error == OPUS_OK){
        std::cout << "Created Opus Encoder struct" << std::endl;
    }
}

opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
    opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
    return i;
}

int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{

    int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);

    return i;
}

CodecOpus::~CodecOpus()
{
    opus_decoder_destroy(this->decoderState);
    opus_encoder_destroy(this->encoderState);

}

音頻輸入.h：

#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
    Q_OBJECT

public:
    AudioInput();
    ~AudioInput();
    void startRecording();
    void CreateNewAudioThread();

private:

    CodecOpus opus;
    unsigned char cbits[4000] = {};
    opus_int16 in[960*2*sizeof(opus_int16)] = {};
    opus_int16 out[5760*2] = {};

    unsigned char *pcm_bytes;

    int MAX_FRAME_SIZE;

    QAudioFormat audioFormat;
    QAudioInput *audioInput;
    QIODevice *mic;
    QByteArray data;
    int micFrameSize;


    QAudioOutput *audioOutput;
    QIODevice *speaker;
    QAudioFormat speakerAudioFormat;




public slots:
    void OnAudioNotfiy();
};

#endif // AUDIOINPUT_H

音頻輸入.cpp：

#include "audioinput.h"

AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
    audioFormat.setSampleRate(48000);
    audioFormat.setChannelCount(2);
    audioFormat.setSampleSize(16);
    audioFormat.setSampleType(QAudioFormat::SignedInt);
    audioFormat.setByteOrder(QAudioFormat::LittleEndian);
    audioFormat.setCodec("audio/pcm");


    speakerAudioFormat.setSampleRate(48000);
    speakerAudioFormat.setChannelCount(2);
    speakerAudioFormat.setSampleSize(16);
    speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
    speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
    speakerAudioFormat.setCodec("audio/pcm");

    QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
    if(!info.isFormatSupported(audioFormat)){
        std::cout << "Mic Format not supported!" << std::endl;
        audioFormat = info.nearestFormat(audioFormat);
    }
    QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
    if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
        std::cout << "Speaker Format is not supported!" << std::endl;
        speakerAudioFormat = info.nearestFormat(speakerAudioFormat);

    }
    std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
    audioInput = new QAudioInput(audioFormat);
    audioOutput = new QAudioOutput(speakerAudioFormat);
    audioInput->setNotifyInterval(20);
    micFrameSize = (audioFormat.sampleRate()/1000)*20;

    opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
    opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());

    MAX_FRAME_SIZE = 6*960;



    connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}

AudioInput::~AudioInput()
{

}

void AudioInput::startRecording()
{

    mic = audioInput->start();
    speaker = audioOutput->start();
    std::cout << "Recording started!" << std::endl;


}


void AudioInput::CreateNewAudioThread()
{
    std::thread t1(&AudioInput::startRecording,this);
    t1.detach();
}





void AudioInput::OnAudioNotfiy()
{
    data = mic->readAll();


    std::cout << "data size" <<data.size() << std::endl;
    if(data.size() > 0){
    pcm_bytes = reinterpret_cast<unsigned char*>(data.data());

//convert

    for(int i=0;i<2*960;i++){ //TODO HARDCODED
        in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
    }
    opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);

    opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);

    for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
    {
        pcm_bytes[2*i]=out[i]&0xFF;
        pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
    }


    speaker->write((const char*)pcm_bytes,3840);
}

}

Answer 1

1）您只編碼了 960 個字節，而緩沖區要大得多。 您必須將緩沖區分成幾個相等的部分並將它們傳遞給編碼器。 零件的尺寸為 120、240、480、960、1920 和 2880。

2) 在從 char 數組轉換為 opus_int16 數組/從 opus_int16 數組到 char 數組時，使用 qFromLittleEndian()/qToLittleEndian() 函數或類型轉換。 這將防止開裂和音質不佳。

例子：

void voice::slot_read_audio_input()
{

    //    Audio settings:
    //    Sample Rate=48000
    //    Sample Size=16
    //    Channel Count=1
    //    Byte Order=Little Endian
    //    Sample Type= UnSignedInt

    //    Encoder settings:
    //    Sample Rate=48000
    //    Channel Count=1
    //    OPUS_APPLICATION_VOIP

    //    Decoder settings:
    //    Sample Rate=48000
    //    Channel Count=1

    QByteArray audio_buffer;//mic
    QByteArray output_audio_buffer;//speaker

    int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
    int const FRAME_SIZE=960;
    int const MAX_FRAME_SIZE=1276;
    int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;

    opus_int16 input_frame[FRAME_SIZE] = {};
    opus_int16 output_frame[FRAME_SIZE] = {};
    unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
    unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};

    audio_buffer.resize(voice_input->bytesReady());   
    output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);

    input->read(audio_buffer.data(),audio_buffer.size());

    for(int i=0;i<FRAME_COUNT;i++)
    {
        //    convert from LittleEndian
        for(int j=0;j<FRAME_SIZE;j++)
        {
            input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
        }

        opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
        opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);

        //    conver to LittleEndian
        for(int j = 0; j<decompressedBytes;j++)
        {
            qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
            //    or use this:
            //    decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
            //    decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
        }

        audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
        output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
        //    or use this:
        //    output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
    }
}

Answer 2

關於您如何將 opus.decodeData 的返回值誤解為字節數，我准備了很長的答案，其中正確的解釋是“每個通道的解碼樣本數”。 但是看起來您在稍后的字節轉換例程中仍然考慮了這一點。 所以我不確定錯誤在哪里。

一般來說，我認為您正在使從 unsigned char <-> int16 的轉換比它需要的更復雜。 您應該能夠將音頻緩沖區直接傳遞到 opus 或從 opus 中傳遞，並將其指針重新解釋為內聯所需的類型，而無需手動進行位操作以在不同緩沖區之間進行轉換和復制。 音頻設備應該為您提供小端數據，但如果不匹配，您可以執行基本的字節交換例程

for (int c = 0; c < numSamples; c++)
{
    unsigned char tmp = data[2 * c];
    data[2 * c] = data[2 * c + 1];
    data[2 * c + 1] = tmp;
}

我在這里沒有看到它，但我假設您也有代碼一次只從麥克風中消耗 960 個樣本，並將其余部分保留在下一幀的緩沖區中，否則您將丟失數據。

這並不重要，但您也可以將 cbits 中的 4000 替換為 1275，這是最大的 opus 數據包大小。

如何使用 opus 編碼和解碼音頻數據？

問題描述

2 個解決方案

解決方案1
1 2019-11-01 17:11:45

解決方案2
0 2018-08-04 17:16:42

如何使用 opus 編碼和解碼音頻數據？

問題描述

2 個解決方案

解決方案1 1 2019-11-01 17:11:45

解決方案2 0 2018-08-04 17:16:42

解決方案1
1 2019-11-01 17:11:45

解決方案2
0 2018-08-04 17:16:42