简体   繁体   English

填充 WASAPI 提供的音频端点缓冲区不播放声音

[英]Filling audio endpoint buffer provided by WASAPI not playing sound

I am trying to play noise through the default audio endpoint renderer using the WASPAI interface.我正在尝试使用 WASPAI 接口通过默认音频端点渲染器播放噪音。 I am trying to use the code provided by Microsoft on this page: https://docs.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream .我正在尝试使用 Microsoft 在此页面上提供的代码: https://docs.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream I want to write a class that can generate noise for this code sample.我想编写一个 class 可以为此代码示例产生噪声。

I have tried writing signed and unsigned integer values to the buffer of the default audio endpoint renderer, and see that values are being written to the buffer, but there is no sound playing.我尝试将有符号和无符号 integer 值写入默认音频端点渲染器的缓冲区,并看到值正在写入缓冲区,但没有声音播放。

To start, I made a header with the needed methods, and a random number generator.首先,我使用所需的方法和随机数生成器制作了 header。

#pragma once

// RNG
#include <random>

template <typename T>
class Random {
public:
    Random(T low, T high) : mLow(low), mHigh(high), function(std::mt19937_64(__rdtsc())) {};

    T operator()() { 
        signed __int64 f =  function();

        return ((f  % ((signed __int64) mHigh + (signed __int64) mLow)) + (signed __int64) mLow); }

private:
    T mLow;
    T mHigh;
    std::mt19937_64 function;
};

class Noise_Gen {

public:

    Noise_Gen() : nChannels(NULL), nSamplesPerSec(NULL), nAvgBytesPerSec(NULL), nByteAlign(NULL), wBitsPerSample(NULL), 
        wValidBitsPerSample(NULL), wSamplesPerBlock(NULL), dwChannelMask(NULL), rd(NULL) {};

    ~Noise_Gen() {
        if(rd != NULL) {
            delete rd;
        }
    };

    HRESULT SetFormat(WAVEFORMATEX*);

    HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags);

private:
    void* rd;

    // WAVEFORMATEX
    WORD nChannels;
    DWORD nSamplesPerSec;
    DWORD nAvgBytesPerSec;
    WORD nByteAlign;
    WORD wBitsPerSample;

    // WAVEFORMATEXTENSIBLE
    WORD wValidBitsPerSample;
    WORD wSamplesPerBlock;
    DWORD dwChannelMask;
};

Then I added the definitions:然后我添加了定义:

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include <time.h>

#include "Noise_Gen.h"

HRESULT Noise_Gen::SetFormat(WAVEFORMATEX* format) {
    nChannels = format->nChannels;
    nSamplesPerSec = format->nSamplesPerSec;
    nAvgBytesPerSec = format->nAvgBytesPerSec;
    nByteAlign = format->nBlockAlign;
    wBitsPerSample = format->wBitsPerSample;
    WORD  wFormatTag = format->wFormatTag;
    if(wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
        WAVEFORMATEXTENSIBLE* pWFE = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);
        wValidBitsPerSample = pWFE->Samples.wValidBitsPerSample;
        wSamplesPerBlock = pWFE->Samples.wSamplesPerBlock;
        dwChannelMask = pWFE->dwChannelMask;
    } else {
        wValidBitsPerSample = wBitsPerSample;
    }
    double amplitude = std::pow(2.0, wValidBitsPerSample) - 1;
    switch(wBitsPerSample / 8) {
    case(1):
        rd = new Random<unsigned __int8>(0.0, amplitude);
        break;
    case(2): 
        rd = new Random<unsigned __int16>(0.0, amplitude);
        break;
    case(3):
        rd = new Random<unsigned __int32>(0.0, amplitude);
        break;
    case(4): 
        rd = new Random<signed __int32>(-amplitude, amplitude);
        break;
    case(5): 
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(6):
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(7): 
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    case(8):
        rd = new Random<unsigned __int64>(0.0, amplitude);
        break;
    default:
        return E_NOTIMPL;
    }
    return S_OK;
}

// (The size of an audio frame = nChannels * wBitsPerSample)
HRESULT Noise_Gen::LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
    for(UINT32 i = 0; i < nChannels *bufferFrameCount; i++) {
        switch(wBitsPerSample / 8) {
        case(1):
            pData[i] = (((Random<unsigned __int8>*)rd)->operator()());
            break;
        case(2):{
            unsigned __int16* pData2 = (unsigned __int16*) pData;
            pData2[i] = (((Random<unsigned __int16>*)rd)->operator()());
            break;
        }
        case(3): {
            __int32 data = ((Random<unsigned __int32>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) (&data);
            pData[(3 * i)] = cp[0];
            pData[1 + (3 * i)] = cp[1];
            pData[2 + (3 * i)] = cp[2];
            break;
        }
        case(4):{
            signed __int32* pData2 = (signed __int32*) pData;
            pData2[i] = (((Random<signed __int32>*)rd)->operator()());
            break;
        }
        case(5): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(5 * i)] = cp[0];
            pData[1 + (5 * i)] = cp[1];
            pData[2 + (5 * i)] = cp[2];
            pData[3 + (5 * i)] = cp[3];
            pData[4 + (5 * i)] = cp[4];
            break;
        }
        case(6): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(6 * i)] = cp[0];
            pData[1 + (6 * i)] = cp[1];
            pData[2 + (6 * i)] = cp[2];
            pData[3 + (6 * i)] = cp[3];
            pData[4 + (6 * i)] = cp[4];
            pData[5 + (6 * i)] = cp[5];
            break;
        }
        case(7): {
            __int64 data = ((Random<unsigned __int64>*)rd)->operator()();
            unsigned char* cp = (unsigned char*) &data;
            pData[(7 * i)] = cp[0];
            pData[1 + (7 * i)] = cp[1];
            pData[2 + (7 * i)] = cp[2];
            pData[3 + (7 * i)] = cp[3];
            pData[4 + (7 * i)] = cp[4];
            pData[5 + (7 * i)] = cp[5];
            pData[6 + (7 * i)] = cp[6];
            break;
        }
        case(8): {
            unsigned __int64* pData2 = (unsigned __int64*) pData;
            pData2[i] = (((Random<unsigned __int64>*)rd)->operator()());
            break;
        }
        default:
            // For stopping playback
            (*flags) = AUDCLNT_BUFFERFLAGS_SILENT;
            return E_NOTIMPL;
        }
    }
    return S_OK;
}

Then I added my class to the template provided by Microsoft and printed the default audio endpoint renderer to the console.然后我将我的 class 添加到 Microsoft 提供的模板中,并将默认音频端点渲染器打印到控制台。

#include <InitGuid.h>
#include <iostream>
#include <Windows.h>
#include <dshow.h>

// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include "Noise_Gen.h"

//-----------------------------------------------------------
// Play an audio stream on the default audio rendering
// device. The PlayAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data to the
// rendering device. The inner loop runs every 1/2 second.
//-----------------------------------------------------------

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

#define EXIT_ON_ERROR(hres)  \
              if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk)  \
              if ((punk) != NULL)  \
                { (punk)->Release(); (punk) = NULL; }

const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioRenderClient = __uuidof(IAudioRenderClient);

HRESULT PlayAudioStream(Noise_Gen* pMySource) {
    HRESULT hr;
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioRenderClient* pRenderClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;
    UINT32 numFramesPadding;
    BYTE* pData;
    DWORD flags = 0;
    IPropertyStore* pPropertyStore = NULL;
    PROPVARIANT name;

    hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL,
                          CLSCTX_ALL, IID_IMMDeviceEnumerator,
                          (void**) &pEnumerator);
    EXIT_ON_ERROR(hr);
    hr = pEnumerator->GetDefaultAudioEndpoint(
        eRender, eConsole, &pDevice);

    hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
    PropVariantInit(&name);
    hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
    printf("%S", name.pwszVal);
    printf("\n");
    EXIT_ON_ERROR(hr);
    hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL,
                           NULL, (void**) &pAudioClient);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetMixFormat(&pwfx);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
                                  0, hnsRequestedDuration,
                                  0, pwfx, NULL);
    EXIT_ON_ERROR(hr);
    // Tell the audio source which format to use.
    hr = pMySource->SetFormat(pwfx);
    EXIT_ON_ERROR(hr);
    // Get the actual size of the allocated buffer.
    hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetService(IID_IAudioRenderClient,
                                  (void**) &pRenderClient);
    EXIT_ON_ERROR(hr);
    // Grab the entire buffer for the initial fill operation.
    hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
    EXIT_ON_ERROR(hr);
    // Load the initial data into the shared buffer.
    hr = pMySource->LoadData(bufferFrameCount, pData, &flags);
    EXIT_ON_ERROR(hr);
    hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
    EXIT_ON_ERROR(hr);
    // Calculate the actual duration of the allocated buffer.
    hnsActualDuration = (double) REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
    hr = pAudioClient->Start();  // Start playing.
    EXIT_ON_ERROR(hr);
    // Each loop fills about half of the shared buffer.
    while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
        // Sleep for half the buffer duration.
        Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
        // See how much buffer space is available.
        hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
        EXIT_ON_ERROR(hr);
        numFramesAvailable = bufferFrameCount - numFramesPadding;
        // Grab all the available space in the shared buffer.
        hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
        EXIT_ON_ERROR(hr);
        // Get next 1/2-second of data from the audio source.
        hr = pMySource->LoadData(numFramesAvailable, pData, &flags);
        EXIT_ON_ERROR(hr);
        hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
        EXIT_ON_ERROR(hr);
    }
    // Wait for last data in buffer to play before stopping.
    Sleep((DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2));
    hr = pAudioClient->Stop();  // Stop playing.
    EXIT_ON_ERROR(hr);
Exit:
    CoTaskMemFree(pwfx);
    SAFE_RELEASE(pEnumerator);
    SAFE_RELEASE(pDevice);
    SAFE_RELEASE(pAudioClient);
    SAFE_RELEASE(pRenderClient);
    return hr;
}

int main() {
    HRESULT hr = CoInitialize(nullptr);
    if(FAILED(hr)) { return hr; }
    Noise_Gen* ng = new Noise_Gen();
    PlayAudioStream(ng);
    delete ng;
    CoUninitialize();
}

The default audio endpoint renderer on my system uses 32 bit values, so the code started by writing unsigned 32 bit values to the buffer.我系统上的默认音频端点渲染器使用 32 位值,因此代码通过将无符号 32 位值写入缓冲区开始。 I then tried to use signed values, which can be seen in the code above.然后我尝试使用有符号值,可以在上面的代码中看到。 No sound was played in both these cases.在这两种情况下都没有播放声音。 I checked the contents of the buffer while debugging and they do change.我在调试时检查了缓冲区的内容,它们确实发生了变化。 I printed the default audio endpoint renderer to the console, and it is my system's speaker.我将默认音频端点渲染器打印到控制台,它是我系统的扬声器。 Windows even shows my app in the Volume mixer, but there is no sound showing even with the volume all the way up. Windows 甚至在音量混音器中显示我的应用程序,但即使音量一直升高也没有声音显示。 I then checked the sleep time to be sure it was sleeping so the system had access to the buffer, and it does sleep for 500ms between writes to the buffer.然后我检查了睡眠时间以确保它正在睡眠,因此系统可以访问缓冲区,并且它确实在写入缓冲区之间睡眠了 500 毫秒。

Update: I found out I am using the KSDATAFORMAT_SUBTYPE_IEEE_FLOAT subformat and have tried feeding the buffer floats in the -amplitude to amplitude range, the the 0 to amplitude range, the -1 to 1 range, and the 0 to 1 range.更新:我发现我正在使用 KSDATAFORMAT_SUBTYPE_IEEE_FLOAT 子格式,并尝试在 -amplitude 到振幅范围、0 到振幅范围、-1 到 1 范围和 0 到 1 范围内提供缓冲区浮点数。

What am I missing?我错过了什么?

Your random number distribution code does not work correctly for floating point formats (which is basically always going to be the mix format in shared mode as far as I know).您的随机数分布代码不适用于浮点格式(据我所知,这基本上总是共享模式下的混合格式)。

It's wrong even for integers.即使是整数也是错误的。 I assume you meant to write我假设你打算写

((f  % ((signed __int64) mHigh - (signed __int64) mLow)) + (signed __int64) mLow); 

(note the minus), but you should not use raw modulus anyway because it's slightly biased. (注意减号),但无论如何你都不应该使用原始模数,因为它有点偏差。

For floating point formats you always use the -1 to 1 range.对于浮点格式,您始终使用 -1 到 1 的范围。

I have adapted your code to use std::uniform_real_distribution and I get noise playing on my speakers.我已经修改了您的代码以使用 std::uniform_real_distribution 并且在我的扬声器上播放噪音。

#include <cstdio>
#include <Windows.h>

// Windows multimedia device
#include <Mmdeviceapi.h>
#include <Functiondiscoverykeys_devpkey.h>

// WASAPI
#include <Audiopolicy.h>
#include <Audioclient.h>

#include <random>


class Noise_Gen {
public:
    Noise_Gen() : format(), engine(__rdtsc()), float_dist(-1.f, 1.f) {};

    void SetFormat(WAVEFORMATEX* wfex) {
        if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
            format = *reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
        } else {
            format.Format = *wfex;
            format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
            INIT_WAVEFORMATEX_GUID(&format.SubFormat, wfex->wFormatTag);
            format.Samples.wValidBitsPerSample = format.Format.wBitsPerSample;
            format.dwChannelMask = 0;
        }
    }

    // (The size of an audio frame = nChannels * wBitsPerSample)
    void FillBuffer(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags) {
        const UINT16 formatTag = EXTRACT_WAVEFORMATEX_ID(&format.SubFormat);
        if(formatTag == WAVE_FORMAT_IEEE_FLOAT) {
            float* fData = (float*)pData;
            for(UINT32 i = 0; i < format.Format.nChannels * bufferFrameCount; i++) {
                fData[i] = float_dist(engine);
            }
        } else if(formatTag == WAVE_FORMAT_PCM) {
            using rndT = decltype(engine)::result_type;
            UINT32 iterations = format.Format.nBlockAlign * bufferFrameCount / sizeof(rndT);
            UINT32 leftoverBytes = format.Format.nBlockAlign * bufferFrameCount % sizeof(rndT);
            rndT* iData = (rndT*)pData;
            UINT32 i = 0;
            for(; i < iterations; i++) {
                iData[i] = engine();
            }
            if(leftoverBytes != 0) {
                rndT lastRnd = engine();
                BYTE* pLastBytes = pData + i * sizeof(rndT);
                for(UINT32 j = 0; j < leftoverBytes; ++j) {
                    pLastBytes[j] = lastRnd >> (j * 8) & 0xFF;
                }
            }
        } else {
            //memset(pData, 0, wfex.Format.nBlockAlign * bufferFrameCount);
            *flags = AUDCLNT_BUFFERFLAGS_SILENT;
        }
    }

private:
    WAVEFORMATEXTENSIBLE format;

    std::mt19937_64 engine;
    std::uniform_real_distribution<float> float_dist;
};

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC  10000000ll
#define REFTIMES_PER_MILLISEC  10000

#define EXIT_ON_ERROR(hres)  \
              if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk)  \
              if ((punk) != NULL)  \
                { (punk)->Release(); (punk) = NULL; }

HRESULT PlayAudioStream(Noise_Gen* pMySource) {
    HRESULT hr;
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    IMMDeviceEnumerator* pEnumerator = NULL;
    IPropertyStore* pPropertyStore = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioRenderClient* pRenderClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 bufferFrameCount;
    BYTE* pData;
    DWORD flags = 0;
    PROPVARIANT name;

    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
        CLSCTX_ALL, IID_PPV_ARGS(&pEnumerator));
    EXIT_ON_ERROR(hr);
    hr = pEnumerator->GetDefaultAudioEndpoint(
        eRender, eConsole, &pDevice);
    EXIT_ON_ERROR(hr);

    hr = pDevice->OpenPropertyStore(STGM_READ, &pPropertyStore);
    EXIT_ON_ERROR(hr);
    PropVariantInit(&name);
    hr = pPropertyStore->GetValue(PKEY_Device_FriendlyName, &name);
    EXIT_ON_ERROR(hr);
    printf("%S", name.pwszVal);
    printf("\n");
    hr = pDevice->Activate(__uuidof(pAudioClient), CLSCTX_ALL,
        NULL, (void**) &pAudioClient);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetMixFormat(&pwfx);
    EXIT_ON_ERROR(hr);

    hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
        0, hnsRequestedDuration,
        0, pwfx, NULL);
    EXIT_ON_ERROR(hr);
    // Tell the audio source which format to use.
    pMySource->SetFormat(pwfx);
    // Get the actual size of the allocated buffer.
    hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    EXIT_ON_ERROR(hr);
    hr = pAudioClient->GetService(IID_PPV_ARGS(&pRenderClient));
    EXIT_ON_ERROR(hr);
    // Grab the entire buffer for the initial fill operation.
    hr = pRenderClient->GetBuffer(bufferFrameCount, &pData);
    EXIT_ON_ERROR(hr);

    // Load the initial data into the shared buffer.
    pMySource->FillBuffer(bufferFrameCount, pData, &flags);    

    hr = pRenderClient->ReleaseBuffer(bufferFrameCount, flags);
    EXIT_ON_ERROR(hr);
    // Calculate the actual duration of the allocated buffer.
    hnsActualDuration = REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
    hr = pAudioClient->Start();  // Start playing.
    EXIT_ON_ERROR(hr);
    // Each loop fills about half of the shared buffer.
    DWORD sleepTime;
    while(flags != AUDCLNT_BUFFERFLAGS_SILENT) {
        // Sleep for half the buffer duration.
        sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
        if(sleepTime != 0)
            Sleep(sleepTime);
        // See how much buffer space is available.
        UINT32 numFramesPadding;
        hr = pAudioClient->GetCurrentPadding(&numFramesPadding);
        EXIT_ON_ERROR(hr);

        UINT32 numFramesAvailable = bufferFrameCount - numFramesPadding;
        // Grab all the available space in the shared buffer.
        hr = pRenderClient->GetBuffer(numFramesAvailable, &pData);
        EXIT_ON_ERROR(hr);

        // Get next 1/2-second of data from the audio source.
        pMySource->FillBuffer(numFramesAvailable, pData, &flags);

        hr = pRenderClient->ReleaseBuffer(numFramesAvailable, flags);
        EXIT_ON_ERROR(hr);
    }
    // Wait for last data in buffer to play before stopping.
    sleepTime = (DWORD) (hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
    if(sleepTime != 0)
        Sleep(sleepTime);
    hr = pAudioClient->Stop();  // Stop playing.
    EXIT_ON_ERROR(hr);

Exit:
    CoTaskMemFree(pwfx);
    SAFE_RELEASE(pRenderClient);
    SAFE_RELEASE(pAudioClient);
    SAFE_RELEASE(pDevice);
    SAFE_RELEASE(pPropertyStore); // you forgot to free the property store
    SAFE_RELEASE(pEnumerator);
    return hr;
}

int main() {
    HRESULT hr = CoInitialize(nullptr);
    if(FAILED(hr)) { return hr; }

    Noise_Gen ng;
    PlayAudioStream(&ng);
    
    CoUninitialize();
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM