尝试从 Win32 WASAPI C++ 创建带有“捕获流”的 wav 文件

Question

I'm having trouble attempting to interpret (BYTE* pData) audio data that the getBuffer function returns.我在尝试解释 getBuffer 函数返回的 (BYTE* pData) 音频数据时遇到问题。 I'm attempting to write captured audio from my mic into a wav file I myself am constructing all for the purpose of better understanding audio devices, audio data, and audio formats.我试图将我的麦克风捕获的音频写入一个 wav 文件，我自己正在构建所有这些文件，目的是更好地理解音频设备、音频数据和音频格式。

Below is all code most of which is used from or a reference from the Windows doc: https://docs.microsoft.com/en-us/windows/win32/coreaudio/capturing-a-stream .以下是所有代码，其中大部分代码来自 Windows 文档或引用自 Windows 文档： https : //docs.microsoft.com/en-us/windows/win32/coreaudio/capturing-a-stream 。 Trying to keep things as simple as possible nothing fancy.试图让事情尽可能简单没有什么花哨的。 This code captures a few second of mic audio which you can hear distorted and really static-y.此代码捕获了几秒钟的麦克风音频，您可以听到失真且非常静态的声音。 Is the distortion due to how I'm writing the pData contents into the file?失真是由于我将 pData 内容写入文件的方式造成的吗？

Main.cpp Note - please ignore the 'cout' all over the place, only for debugging Main.cpp 注意 - 请忽略整个地方的 'cout'，仅用于调试

#pragma once
#include "MyAudioSink.h"
#include <windows.h>

// REFERENCE_TIME time units per second and per millisecond
#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

#define EXIT_ON_ERROR(hres)  \
              if (FAILED(hres)) { goto Exit; }
#define SAFE_RELEASE(punk)  \
              if ((punk) != NULL)  \
                { (punk)->Release(); (punk) = NULL; }



const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
const IID IID_IAudioClient = __uuidof(IAudioClient);
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);

HRESULT RecordAudioStream(MyAudioSink * pMySink);


int main() {

    HRESULT hr;
    hr = CoInitialize(nullptr);

    //declare MyAudioSink object
    MyAudioSink pMySink;

    hr = RecordAudioStream(&pMySink);

    cout << "done";

}


//-----------------------------------------------------------
// Record an audio stream from the default audio capture
// device. The RecordAudioStream function allocates a shared
// buffer big enough to hold one second of PCM audio data.
// The function uses this buffer to stream data from the
// capture device. The main loop runs every 1/2 second.
//-----------------------------------------------------------



HRESULT RecordAudioStream(MyAudioSink* pMySink)
{
    HRESULT hr;
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;
    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BOOL bDone = FALSE;
    BYTE* pData;
    DWORD flags;

    cout << "starting...";

    hr = CoCreateInstance(
        CLSID_MMDeviceEnumerator, NULL,
        CLSCTX_ALL, IID_IMMDeviceEnumerator,
        (void**)&pEnumerator);

    cout << "test1" ;
    EXIT_ON_ERROR(hr)

        hr = pEnumerator->GetDefaultAudioEndpoint(
            eCapture, eConsole, &pDevice);
    cout << "test2" ;
    EXIT_ON_ERROR(hr)

        hr = pDevice->Activate(
            IID_IAudioClient, CLSCTX_ALL,
            NULL, (void**)&pAudioClient);
    cout << "test3" ;
    EXIT_ON_ERROR(hr)

        hr = pAudioClient->GetMixFormat(&pwfx);
    cout << "test4" ;
    EXIT_ON_ERROR(hr)

        hr = pAudioClient->Initialize(
            AUDCLNT_SHAREMODE_SHARED,
            0,
            hnsRequestedDuration,
            0,
            pwfx,
            NULL);
    cout << "test5" ;
    EXIT_ON_ERROR(hr)

        // Get the size of the allocated buffer.
        hr = pAudioClient->GetBufferSize(&bufferFrameCount);
    cout << "test6" ;
    EXIT_ON_ERROR(hr)

        hr = pAudioClient->GetService(
            IID_IAudioCaptureClient,
            (void**)&pCaptureClient);
    cout << "test7" ;
    EXIT_ON_ERROR(hr)

        // Calculate the actual duration of the allocated buffer.
        hnsActualDuration = (double)REFTIMES_PER_SEC *
        bufferFrameCount / pwfx->nSamplesPerSec;

    // Notify the audio sink which format to use.
    hr = pMySink->SetFormat(pwfx);
    cout << "test8" ;
    EXIT_ON_ERROR(hr)

        //initialize the wav file with the specifications set by SetFormat
        hr = pMySink->_Initialize_File();
    cout << "test9" ;
    EXIT_ON_ERROR(hr)

        hr = pAudioClient->Start();  // Start recording.
    cout << "test10" ;
    EXIT_ON_ERROR(hr)

        cout << "about to run while...";

        // Each loop fills about half of the shared buffer.
        while (bDone == FALSE)
        {

            // Sleep for half the buffer duration.
            Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);

            hr = pCaptureClient->GetNextPacketSize(&packetLength);
            EXIT_ON_ERROR(hr)

                while (packetLength != 0)
                {
                    // Get the available data in the shared buffer.
                    hr = pCaptureClient->GetBuffer(
                        &pData,
                        &numFramesAvailable,
                        &flags, NULL, NULL);
                    EXIT_ON_ERROR(hr)

                        if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
                        {
                            cout << "silent";
                            pData = NULL;  // Tell CopyData to write silence.
                        }

                    // Copy the available capture data to the audio sink.
                    hr = pMySink->CopyData(
                        pData, numFramesAvailable, &bDone);
                    EXIT_ON_ERROR(hr)

                        hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
                    EXIT_ON_ERROR(hr)

                        hr = pCaptureClient->GetNextPacketSize(&packetLength);
                    EXIT_ON_ERROR(hr)
                }
        }
    hr = pMySink->_File_WrapUp();
    EXIT_ON_ERROR(hr)

        hr = pAudioClient->Stop();  // Stop recording.
    EXIT_ON_ERROR(hr)

        Exit:
    CoTaskMemFree(pwfx);
    SAFE_RELEASE(pEnumerator)
        SAFE_RELEASE(pDevice)
        SAFE_RELEASE(pAudioClient)
        SAFE_RELEASE(pCaptureClient)

        return hr;
}

MyAudioSink.cpp Note ** - this is where the issue is. MyAudioSink.cpp 注意** - 这就是问题所在。 You may notice that a UDF called 'write_word' is what initializes the wav file with all of the audio format parameters, however, I haven't been able to figure out how to use this function to write the pData contents, so tried to use the ostream write function instead which yielded the best results so far (hearing my voice) but it sounds extremely static-y and distorted.您可能会注意到一个名为“write_word”的 UDF 使用所有音频格式参数初始化 wav 文件，但是，我一直无法弄清楚如何使用此函数来写入 pData 内容，因此尝试使用ostream write 函数产生了迄今为止最好的结果（听到我的声音），但它听起来非常静态和失真。

#pragma once

#include "MyAudioSink.h"
#include <string.h>


namespace little_endian_io
{
    template <typename Word>
    std::ostream& write_word(std::ostream& outs, Word value, unsigned size = sizeof(Word))
    {

        for (; size; --size, value >>= 8)
            outs.put(static_cast <char> (value & 0xFF));
        return outs;
    }
}
using namespace little_endian_io;

HRESULT MyAudioSink::_Initialize_File() {



    cout << "initializing file";

    // prepare our wav file
    mainFile.open("example.wav", ios::out | ios::binary);

    // Write the file headers and sound format
    mainFile << "RIFF----WAVEfmt ";     // (chunk size to be filled in later)
    write_word(mainFile, 16, 4);  // no extension data
    write_word(mainFile, 1, 2);  // PCM - integer samples
    write_word(mainFile, nChannels, 2);  // two channels (stereo file)
    write_word(mainFile, nSamplesPerSec, 4);  // samples per second (Hz)
    write_word(mainFile, nAvgBytesPerSec, 4);  // (Sample Rate * BitsPerSample * Channels) / 8
    write_word(mainFile, nBlockAlign, 2);  // data block size (size of two integer samples, one for each channel, in bytes)
    write_word(mainFile, wBitsPerSample, 2);  // number of bits per sample (use a multiple of 8)

    // Write the data chunk header
    data_chunk_pos = mainFile.tellp();
    mainFile << "data----";  // (chunk size to be filled in later)..

    //start by setting our complete variable to False, main func will turn to true
    bComplete = false;
    //testing
    test = 0;

    return S_OK;

}

HRESULT MyAudioSink::SetFormat(WAVEFORMATEX* pwfx) {



    //Update our format variables
    wFormatTag = pwfx->wFormatTag;
    nChannels = pwfx->nChannels;
    nSamplesPerSec = pwfx->nSamplesPerSec;
    nAvgBytesPerSec = pwfx->nAvgBytesPerSec;
    nBlockAlign = pwfx->nBlockAlign;
    wBitsPerSample = pwfx->wBitsPerSample;
    cbSize = pwfx->cbSize;

    return S_OK;

}

HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone) {
    //TODO

    //forgot how to do this part, figure it out
    for (int i = 0; i < numFramesAvailable; i++) {
        mainFile.write((const char*) pData+(i* nBlockAlign), nBlockAlign);
    }


    //test
    test++;
    if (test >= nBlockAlign * 120) bComplete = true;

    //check if our main function is done to finish capture
    if (bComplete) *bDone = true;


    return S_OK;
}

HRESULT MyAudioSink::_File_WrapUp() {



    // (We'll need the final file size to fix the chunk sizes above)
    file_length = mainFile.tellp();

    // Fix the data chunk header to contain the data size
    mainFile.seekp(data_chunk_pos + 4);
    write_word(mainFile, file_length - data_chunk_pos + 8);

    // Fix the file header to contain the proper RIFF chunk size, which is (file size - 8) bytes
    mainFile.seekp(0 + 4);
    write_word(mainFile, file_length - 8, 4);

    mainFile.close();

    cout << "finalized file";

    return S_OK;
}

MyAudioSink.h我的音频接收器

#pragma once

//
#include <audioclient.h>
#include <Mmdeviceapi.h>
#include <fstream>
#include <iostream>
#include <cmath>



using namespace std;

class MyAudioSink
{

private:

    size_t data_chunk_pos;
    size_t file_length;
    ofstream mainFile;

    //sample format
    WORD  wFormatTag;
    WORD  nChannels;
    DWORD nSamplesPerSec;
    DWORD nAvgBytesPerSec;
    WORD  nBlockAlign;
    WORD  wBitsPerSample;
    WORD  cbSize;
    int test;

public:

    bool bComplete;

    HRESULT _Initialize_File();
    HRESULT SetFormat(WAVEFORMATEX* pwfx);
    HRESULT CopyData(BYTE* pData, UINT32 numFramesAvailable, BOOL* bDone);
    HRESULT _File_WrapUp();
};

Answer 1

The problem I suspect is that your program only handles PCM format, not Extensible Format.我怀疑的问题是你的程序只处理 PCM 格式，而不是可扩展格式。 The final header will not be the same WAVE Specifications最终标头将与WAVE 规范不同

Add this code to confirm :添加此代码以确认：

    pAudioClient->GetMixFormat(&pwfx);

    switch(pwfx->wFormatTag)
    {
        case WAVE_FORMAT_PCM:
            cout << "WAVE_FORMAT_PCM";
            break;

        case WAVE_FORMAT_IEEE_FLOAT:
            cout << "WAVE_FORMAT_IEEE_FLOAT";
            break;

        case WAVE_FORMAT_EXTENSIBLE:
            cout << "WAVE_FORMAT_EXTENSIBLE";

            WAVEFORMATEXTENSIBLE *pWaveFormatExtensible = reinterpret_cast<WAVEFORMATEXTENSIBLE *>(pwfx);

            if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
            {
                cout << "KSDATAFORMAT_SUBTYPE_PCM";
            }
            else if(pWaveFormatExtensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
            {
                cout << "KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
            }
            break;
    }

I think the most common case will be WAVE_FORMAT_EXTENSIBLE and KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ...我认为最常见的情况是 WAVE_FORMAT_EXTENSIBLE 和 KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ...

EDIT编辑

I've made a quick sample here : WasapiCapture我在这里做了一个快速示例： WasapiCapture

尝试从 Win32 WASAPI C++ 创建带有“捕获流”的 wav 文件

问题描述

1 个解决方案

解决方案1
1 2020-01-15 20:19:15

尝试从 Win32 WASAPI C++ 创建带有“捕获流”的 wav 文件

问题描述

1 个解决方案

解决方案1 1 2020-01-15 20:19:15

解决方案1
1 2020-01-15 20:19:15