如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

Question

Since Windows Multimedia turned out to be utterly incapable of recording continuous audio , I got the hint to use Windows Core Audio.由于Windows Multimedia 完全无法录制连续音频，因此我得到了使用 Windows Core Audio 的提示。 There is sort of a manual here , but I can't figure out how to write the loads of overhead code to get the recording working. 这里有一本手册，但我不知道如何编写大量开销代码以使录音正常工作。 Can anyone provide a complete, minimal implementation of continuous audio recording to a circular buffer?任何人都可以提供一个完整的、最小的连续录音到循环缓冲区的实现吗？

So far I am stuck at the code below not getting past the line pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);到目前为止，我被困在下面的代码中，没有越过pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);行。 because pEnumerator remains nullptr .因为pEnumerator仍然是nullptr 。

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

int main() {
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.

    // Each loop fills about half of the shared buffer.
    while(true) {
        // Sleep for half the buffer duration.
        Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
        pCaptureClient->GetNextPacketSize(&packetLength);
        while(packetLength != 0) {
            // Get the available data in the shared buffer.
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }

            // Copy the available capture data to the audio sink.
            //hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);

            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }
    }
    pAudioClient->Stop();
    return 0;
}

EDIT (24.07.2021):编辑（2021 年 7 月 24 日）：

Here is an update of my code for troubleshooting:这是我用于故障排除的代码的更新：

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#include <chrono>
class Clock {
private:
    typedef chrono::high_resolution_clock clock;
    chrono::time_point<clock> t;
public:
    Clock() { start(); }
    void start() { t = clock::now(); }
    double stop() const { return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); }
};

const uint base        =   4096;
const uint sample_rate =  48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth   =   5000; // must be <= sample_rate/2

float* wave = new float[sample_size]; // circular buffer

void fill(float* const wave, const float* const buffer, int offset) {
    for(int i=sample_size; i>=offset; i--) {
        wave[i] = wave[i-offset];
    }
    for(int i=0; i<offset; i++) {
        const uint p = offset-1-i;
        wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
    }
}

int main() {
    for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
    
    Clock clock;

    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000

    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoInitializeEx(NULL, COINIT_MULTITHREADED);
    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    
    println(pwfx->wFormatTag);// 65534
    println(WAVE_FORMAT_PCM);// 1
    println(pwfx->nChannels);// 2
    println((uint)pwfx->nSamplesPerSec);// 48000
    println(pwfx->wBitsPerSample);// 32
    println(pwfx->nBlockAlign);// 8
    println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
    println((uint)pwfx->nAvgBytesPerSec);// 384000
    println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
    println(pwfx->cbSize);// 22

    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    //REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.
    
    while(running) {

        pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
        pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);

        const int offset = (uint)numFramesAvailable;
        if(offset>0) {
            fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
        }

        while(packetLength != 0) {
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }
            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }

        sleep(1.0/120.0-clock.stop());
        clock.start();
    }
    pAudioClient->Stop();
}

Answer 1

You're not calling CoInitializeEx , so all COM calls will fail.您没有调用CoInitializeEx ，因此所有 COM 调用都将失败。

You should also be testing all calls to see if they return an error.您还应该测试所有调用以查看它们是否返回错误。

To address the questions posed in the comments:为了解决评论中提出的问题：

I believe that if you want to operate the endpoint in shared mode then you have to use the parameters returned by GetFixFormat .我相信如果你想在共享模式下操作端点，那么你必须使用GetFixFormat返回的参数。 This means that:这意味着：

you are limited to the one sample rate (unless you write code to perform a conversion, which is a non-trivial task)您仅限于一个采样率（除非您编写代码来执行转换，这是一项不平凡的任务）
if you want the samples as floats, you will have to convert them yourself如果您希望将样本作为浮点数，则必须自己转换它们

To write code that runs on all machines, you must cater for whatever the mix format throws at you.要编写在所有机器上运行的代码，您必须迎合混合格式给您带来的任何问题。 This might be:这可能是：

16 bit integers 16 位整数
24 bit integers (nBlockAlign = 3) 24 位整数 (nBlockAlign = 3)
24 bit integers in 32 bit containers (nBlockAlign = 4) 32 位容器中的 24 位整数 (nBlockAlign = 4)
32 bit integers 32 位整数
32 bit floating point (rare) 32 位浮点（罕见）
64 bit floating point (unheard of, in my experience) 64 位浮点（以我的经验闻所未闻）

The samples will be in the native byte order of the machine your code is running on, and are interleaved.样本将按照运行代码的机器的本机字节顺序排列，并且是交错的。

So, case out on the various parameters in pwfx and write the relevant code for each sample format you want to support.因此，请对pwfx中的各种参数进行区分，并为您要支持的每种示例格式编写相关代码。

Assuming you want your float s to be normalised to -1.. +1 , and 2-channel input data, you might do this for 16 bit integers, for example:假设您希望将float s 标准化为-1.. +1和 2 通道输入数据，您可以对 16 位整数执行此操作，例如：

const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;

for (int i = 0; i < numFramesAvailable * 2; ++i)
{
    int16_t sample = *inbuf++;
    *outbuf++ = (float) (sample * (1.0 / 32767));
}

Note that I avoid a (slow) floating point division by multiplying by the reciprocal (the compiler will pre-calculate 1.0 / 32767 ).请注意，我通过乘以倒数来避免（慢）浮点除法（编译器将预先计算1.0 / 32767 ）。

I'll leave the rest to you.我会把 rest 留给你。

Answer 2

You could use this audio library instead.您可以改用此音频库。 Its way easier to get up and running than trying to interface with the platform specific SDKs:它比尝试与平台特定的 SDK 交互更容易启动和运行：

http://www.music.mcgill.ca/~gary/rtaudio/recording.html http://www.music.mcgill.ca/~gary/rtaudio/recording.html

Also, while removing the sleep might not help in your example you should never call sleep, lock a mutex, or allocate memory during audio processing.此外，虽然在您的示例中删除睡眠可能无济于事，但您不应该在音频处理期间调用睡眠、锁定互斥锁或分配 memory。 The delay introduced by those is completely arbitrary compared to the short buffer times, so will always create problems for you.与较短的缓冲时间相比，这些引入的延迟是完全任意的，因此总会给您带来问题。

如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

问题描述

2 个解决方案

解决方案1
2 2021-07-21 18:11:02

解决方案2
1 2021-07-27 09:47:38

如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

问题描述

2 个解决方案

解决方案1 2 2021-07-21 18:11:02

解决方案2 1 2021-07-27 09:47:38

解决方案1
2 2021-07-21 18:11:02

解决方案2
1 2021-07-27 09:47:38