I am trying to decode even just a single H264 frame with the H264 Decoder MFT
, but I've been having problems with ProcessOutput()
. I've reduced the bad HRESULT
's as much as I can, but I'm currently stuck on dealing with MF_E_TRANSFORM_STREAM_CHANGE
. This occurs after I set the pSample equal to my allocated output_sample
and call ProcessOutput()
, since this decoder requires you allocate your own sample. I tried resetting the output type using SetOutputType()
to what I had in my configure_decoder()
function, but alas I get a bad HRESULT. Not sure what to do next.
// libs
#pragma comment(lib, "D3D11.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "evr.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "Winmm.lib")
// std
#include <iostream>
#include <string>
#include <fstream>
// Windows
#include <windows.h>
#include <atlbase.h>
// DirectX
#include <d3d11.h>
// Media Foundation
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
// Others
#include <chrono>
#include <thread>
#include <direct.h> // for mkdir()
#include <Codecapi.h> // for CODECAPI_AVDecVideoAcceleration_H264
#include <comdef.h>
// Custom
// Constants
constexpr UINT decode_width = 1920;
constexpr UINT decode_height = 1080;
HRESULT init_mf()
{
HRESULT hr = S_OK;
if (FAILED(hr = CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED)))
return hr;
if (FAILED(hr = MFStartup(MF_VERSION)))
return hr;
std::cout << "- Initialized Media Foundation" << std::endl;
return hr;
}
HRESULT init_dxgi(CComPtr<ID3D11Device>& out_device, CComPtr<ID3D11DeviceContext>& in_context)
{
HRESULT hr = S_OK;
if (FAILED(hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT /*|
D3D11_CREATE_DEVICE_DEBUG*/, nullptr, 0, D3D11_SDK_VERSION, &out_device, nullptr, &in_context)))
return hr;
std::cout << "- Initialized DXGI" << std::endl;
return hr;
}
HRESULT get_decoder(CComPtr<IMFTransform>& out_transform, CComPtr<IMFActivate>& out_activate,
CComPtr<IMFAttributes>& out_attributes)
{
HRESULT hr = S_OK;
// Find the decoder
CComHeapPtr<IMFActivate*> activate_raw;
uint32_t activateCount = 0;
// Input & output types
const MFT_REGISTER_TYPE_INFO in_info = { MFMediaType_Video, MFVideoFormat_H264 };
const MFT_REGISTER_TYPE_INFO out_info = { MFMediaType_Video, MFVideoFormat_NV12 };
// Get decoders matching the specified attributes
if (FAILED(hr = MFTEnum2(MFT_CATEGORY_VIDEO_DECODER, MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_SORTANDFILTER, &in_info, &out_info,
nullptr, &activate_raw, &activateCount)))
return hr;
// Choose the first returned decoder
out_activate = activate_raw[0];
// Memory management
for (int i = 1; i < activateCount; i++)
activate_raw[i]->Release();
// Activate
if (FAILED(hr = out_activate->ActivateObject(IID_PPV_ARGS(&out_transform))))
return hr;
// Get attributes
if (FAILED(hr = out_transform->GetAttributes(&out_attributes)))
return hr;
std::cout << "- get_decoder() Found " << activateCount << " decoders" << std::endl;
return hr;
}
HRESULT configure_decoder(const CComPtr<IMFTransform>& in_transform, CComPtr<IMFDXGIDeviceManager>& in_device_manager,
const DWORD in_input_stream_id, const DWORD output_stream_id
)
{
HRESULT hr = S_OK;
// Sets or clears the Direct3D Device Manager for DirectX Video Acceleration (DXVA).
/*if (FAILED(hr = in_transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(in_device_manager.p))))
return hr;*/
// Input type, I have no idea how to do this
CComPtr<IMFMediaType> input_type;
/*if (FAILED(hr = MFCreateMediaType(&inputType)))
return hr;*/
if (FAILED(hr = in_transform->GetInputAvailableType(in_input_stream_id, 0, &input_type)))
return hr;
// Input type settings
if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264)))
return hr;
if (FAILED(hr = input_type->SetUINT32(MF_MT_AVG_BITRATE, 30000000)))
return hr;
if (FAILED(hr = MFSetAttributeSize(input_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(input_type, MF_MT_FRAME_RATE, 60, 1)))
return hr;
if (FAILED(hr = input_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlaceMode::MFVideoInterlace_Progressive)))
return hr;
// Set input type
if (FAILED(hr = in_transform->SetInputType(in_input_stream_id, input_type, 0)))
return hr;
// Create output type
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;
// Set output type
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeSize(output_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_FRAME_RATE, 60, 1)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1)))
return hr;
/*if (FAILED(hr = output_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlaceMode::MFVideoInterlace_Progressive)))
return hr;*/
// Set output type
if (FAILED(hr = in_transform->SetOutputType(output_stream_id, output_type, 0)))
return hr;
std::cout << "- Set decoder configuration" << std::endl;
// AMD decoder crashes on this line
/*DWORD flags;
if (FAILED(hr = inTransform->GetInputStatus(0, &flags)))
return hr;*/
return hr;
}
HRESULT configure_color_conversion(IMFTransform* in_color_transform)
{
HRESULT hr = S_OK;
CComPtr<IMFMediaType> input_type;
if (FAILED(hr = MFCreateMediaType(&input_type)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeSize(input_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = in_color_transform->SetInputType(0, input_type, 0)))
return hr;
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32)))
return hr;
if (FAILED(hr = MFSetAttributeSize(output_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = in_color_transform->SetOutputType(0, output_type, 0)))
return hr;
return hr;
}
HRESULT color_convert(IMFTransform* in_transform, ID3D11Texture2D* in_texture, IMFSample** p_sample_out)
{
HRESULT hr = S_OK;
// Copy texture, since the one that desktop duplication generates can die at random
CD3D11_TEXTURE2D_DESC desc;
in_texture->GetDesc(&desc);
CComPtr<ID3D11Device> device;
in_texture->GetDevice(&device);
const CD3D11_TEXTURE2D_DESC copy_desc(desc.Format, desc.Width, desc.Height, 1, 1, D3D11_BIND_SHADER_RESOURCE |
D3D11_BIND_RENDER_TARGET);
CComPtr<ID3D11Texture2D> copy_texture;
device->CreateTexture2D(©_desc, nullptr, ©_texture);
CComPtr<ID3D11DeviceContext> device_context;
device->GetImmediateContext(&device_context);
device_context->CopyResource(copy_texture, in_texture);
in_texture = copy_texture;
// Create buffer
CComPtr<IMFMediaBuffer> input_buffer;
if (FAILED(hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), in_texture, 0, false, &input_buffer)))
return hr;
// Create sample
CComPtr<IMFSample> input_sample;
if (FAILED(hr = MFCreateSample(&input_sample)))
return hr;
if (FAILED(hr = input_sample->AddBuffer(input_buffer)))
return hr;
// Set input sample times
if (FAILED(hr = input_sample->SetSampleTime(100)))
return hr;
if (FAILED(hr = input_sample->SetSampleDuration(1000)))
return hr;
// Process input
if (FAILED(hr = in_transform->ProcessInput(0, input_sample, 0)))
return hr;
// Process output
DWORD status;
MFT_OUTPUT_DATA_BUFFER output_buffer;
output_buffer.pSample = nullptr;
output_buffer.pEvents = nullptr;
output_buffer.dwStreamID = 0;
output_buffer.dwStatus = 0;
MFT_OUTPUT_STREAM_INFO mft_stream_info;
ZeroMemory(&mft_stream_info, sizeof(MFT_OUTPUT_STREAM_INFO));
if (FAILED(hr = in_transform->GetOutputStreamInfo(0, &mft_stream_info)))
return hr;
ATLASSERT(mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES);
if (FAILED(hr = in_transform->ProcessOutput(0, 1, &output_buffer, &status)))
return hr;
*p_sample_out = output_buffer.pSample;
// Test output to file
/*IMFMediaBuffer* buffer;
if (FAILED(hr = outputBuffer.pSample->ConvertToContiguousBuffer(&buffer)))
return hr;
unsigned char* data;
DWORD length;
if (FAILED(hr = buffer->GetCurrentLength(&length)))
return hr;
if (FAILED(hr = buffer->Lock(&data, nullptr, &length)))
return hr;
std::ofstream fout;
fout.open("raw.nv12", std::ios::binary | std::ios::out);
fout.write((char*)data, length);
fout.close();
if (FAILED(hr = buffer->Unlock()))
return hr;*/
// End test output
return hr;
}
int main()
{
HRESULT hr;
if (FAILED(hr = init_mf()))
return hr;
// Initialize DXGI
CComPtr<ID3D11Device> device;
CComPtr<ID3D11DeviceContext> context;
if (FAILED(hr = init_dxgi(device, context)))
return hr;
// Create device manager
CComPtr<IMFDXGIDeviceManager> device_manager;
UINT resetToken;
if (FAILED(hr = MFCreateDXGIDeviceManager(&resetToken, &device_manager)))
return hr;
// https://docs.microsoft.com/en-us/windows/win32/api/dxva2api/nf-dxva2api-idirect3ddevicemanager9-resetdevice
// When you first create the Direct3D device manager, call this method with a pointer to the Direct3D device.
if (FAILED(hr = device_manager->ResetDevice(device, resetToken)))
return hr;
// Get decoder
CComPtr<IMFTransform> decoder_transform;
CComPtr<IMFActivate> decoder_activate;
CComPtr<IMFAttributes> decoder_attributes;
CComQIPtr<IMFMediaEventGenerator> decoder_event_generator;
if (FAILED(hr = get_decoder(decoder_transform, decoder_activate, decoder_attributes)))
return hr;
// Get the name of the decoder
CComHeapPtr<wchar_t> friendly_name;
uint32_t friendly_name_length;
if (FAILED(hr = decoder_activate->GetAllocatedString(MFT_FRIENDLY_NAME_Attribute, &friendly_name, &friendly_name_length)))
return hr;
std::wcout << "- Selected decoder: " << static_cast<wchar_t const*>(friendly_name) << std::endl;
// Enable hardware acceleration
if (FAILED(hr = decoder_attributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, true)))
return hr;
// Enable low-latency mode - otherwise the decoder will require many input frames before it's able produce any output
if (FAILED(hr = decoder_attributes->SetUINT32(CODECAPI_AVLowLatencyMode, true)))
return hr;
// Get decoder stream IDs
DWORD input_stream_id, output_stream_id;
hr = decoder_transform->GetStreamIDs(1, &input_stream_id, 1, &output_stream_id);
if (hr == E_NOTIMPL) // Doesn't mean failed, see remarks
{ // https://docs.microsoft.com/en-us/windows/win32/api/mftransform/nf-mftransform-imftransform-getstreamids
input_stream_id = 0;
output_stream_id = 0;
hr = S_OK;
}
if (FAILED(hr))
return hr;
// Init decoder-related objects/variables
if (FAILED(hr = configure_decoder(decoder_transform, device_manager, input_stream_id, output_stream_id)))
return hr;
// Apparently you can do this, idek man
decoder_event_generator = decoder_transform;
// Init color conversion-related objects/variables
IMFTransform* color_transform;
if (FAILED(hr = CoCreateInstance(CLSID_VideoProcessorMFT, nullptr, CLSCTX_INPROC_SERVER,
IID_IMFTransform, (void**)&color_transform)))
return hr;
if (FAILED(hr = color_transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(device_manager.p))))
return hr;
if (FAILED(hr = configure_color_conversion(color_transform)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL)))
return hr;
int frameIndex = 1;
do
{
// Read frame data from .h264 file
std::ifstream fin("..\\CaptureAndEncode\\Encoded Frames\\frame" + std::to_string(frameIndex) + ".h264", std::ios::binary |
std::ios::in);
if (!fin)
throw std::runtime_error("Invalid file path specified");
// Get file length
fin.seekg(0, std::ios::end);
size_t length = fin.tellg();
fin.seekg(0, std::ios::beg);
// Variables
CComPtr<IMFMediaBuffer> input_buffer;
CComPtr<IMFSample> input_sample;
DWORD cbMaxLength, cbCurrentLength;
BYTE* pBuffer;
// Create memory buffer and make the underlying array readable/writeable by using Lock()
if (FAILED(hr = MFCreateMemoryBuffer(length, &input_buffer)))
return hr;
if (FAILED(hr = input_buffer->Lock(&pBuffer, &cbMaxLength, &cbCurrentLength)))
return hr;
// Copy frame data from file to array;
fin.read((char*)pBuffer, length);
fin.close();
// Unlock it again (no longer readable/writeable)
if (FAILED(hr = input_buffer->Unlock()))
return hr;
if (FAILED(hr = input_buffer->SetCurrentLength(length)))
return hr;
// Create sample and add the buffer to it
if (FAILED(hr = MFCreateSample(&input_sample)))
return hr;
if (FAILED(hr = input_sample->AddBuffer(input_buffer)))
return hr;
// Process input
hr = decoder_transform->ProcessInput(0, input_sample, 0);
if (SUCCEEDED(hr))
std::cout << "decoder_transform::ProcessInput() - SUCCESS" << std::endl;
else if (hr == MF_E_NOTACCEPTING)
std::cout << "decoder_transform::ProcessInput() - MF_E_NOTACCEPTING" << std::endl;
else
std::cout << "decoder_transform::ProcessInput() - ERROR" << std::endl;
// Output H264 -> NV12
DWORD status;
MFT_OUTPUT_DATA_BUFFER output_buffer;
output_buffer.pSample = nullptr;
output_buffer.pEvents = nullptr;
output_buffer.dwStreamID = 0;
output_buffer.dwStatus = 0;
MFT_OUTPUT_STREAM_INFO mft_stream_info;
ZeroMemory(&mft_stream_info, sizeof(MFT_OUTPUT_STREAM_INFO));
if (FAILED(hr = decoder_transform->GetOutputStreamInfo(0, &mft_stream_info)))
return hr;
if ((mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0 &&
(mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES) == 0)
{
std::cout << "This decoder requires that we allocate the output sample ourselves" << std::endl;
CComPtr<IMFSample> output_sample;
CComPtr<IMFMediaBuffer> output_media_buffer;
if (FAILED(hr = MFCreateSample(&output_sample)))
return hr;
if (FAILED(hr = MFCreateMemoryBuffer(mft_stream_info.cbSize, &output_media_buffer)))
return hr;
if (FAILED(hr = output_sample->AddBuffer(output_media_buffer)))
return hr;
output_buffer.pSample = output_sample.Detach();
}
hr = decoder_transform->ProcessOutput(0, 1, &output_buffer, &status);
if (SUCCEEDED(hr))
{
std::cout << "decoder_transform::ProcessOutput() - SUCCESS" << std::endl;
// Test output to file
IMFMediaBuffer* buffer;
if (FAILED(hr = output_buffer.pSample->ConvertToContiguousBuffer(&buffer)))
return hr;
unsigned char* data;
DWORD length;
if (FAILED(hr = buffer->GetCurrentLength(&length)))
return hr;
if (FAILED(hr = buffer->Lock(&data, nullptr, &length)))
return hr;
std::ofstream fout;
fout.open("raw.nv12", std::ios::binary | std::ios::out);
fout.write((char*)data, length);
fout.close();
if (FAILED(hr = buffer->Unlock()))
return hr;
// End test output
}
else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
std::cout << "decoder_transform::ProcessOutput() - MF_E_TRANSFORM_NEED_MORE_INPUT" << std::endl;
else if (hr == E_INVALIDARG)
std::cout << "decoder_transform::ProcessOutput() - E_INVALIDARG" << std::endl;
else if (hr == MF_E_TRANSFORM_STREAM_CHANGE)
{
std::cout << "decoder_transform::ProcessOutput() - MF_E_TRANSFORM_STREAM_CHANGE" << std::endl;
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = decoder_transform->GetOutputAvailableType(0, 0, &output_type)))
return hr;
// Create output type
/*if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;*/
// Reconfigure output settings
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1)))
return hr;
// Set output type
if (FAILED(hr = decoder_transform->SetOutputType(output_stream_id, output_type, 0)))
return hr;
}
frameIndex++;
input_sample.Release();
} while (frameIndex <= 60);
return hr;
}
You just need to follow this at Handling Stream Changes :
- The client calls
IMFTransform::GetOutputAvailableType
. This method returns an updated set of output types.- The client calls
SetOutputType
to set a new output type.- The client resumes calling
ProcessInput
/ProcessOutput
.
In the question body above you are trying to do 3 without doing 2. Most likely your media type is somewhat different from MFT's so it is likely to reject it and it blocks the processing until this is resolved.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.