繁体   English   中英

Rust 计算管道的 Vulkano 创建不起作用并冻结我的视频卡

[英]Rust Vulkano creation of compute pipeline does not work and freeze my video card

我正在尝试将着色器加载到 amd 视频卡中。 创建所有缓冲区后,我尝试创建一个新的计算管道。 当我开始调试它打印消息时,我发现“完成创建计算管道”从未被打印出来。 当我用 `cargo run --release` 运行它时,它会打印:“Creating pipeline with shader”,但几秒钟后它冻结了我的整个计算机,我必须将其关闭并重新打开......

我的 Vulkano 版本是:0.32.1; 我的 vulkano-shaders 版本是:0.32.1; 我的显卡是:AMD RX570 4GB

Vulkano 物理设备属性:

buffer_image_granularity: 64,
compute_units_per_shader_array: Some(
    8,
),
conformance_version: Some(
    1.2.0,
),

货物.toml:

[package]
name = "vulkano_matrix"
version = "0.1.0"
edition = "2021"

[dependencies]
vulkano = "0.32.1"
vulkano-shaders = "0.32.0"
rand = "0.8.4"
nalgebra="0.31.4"
colored = "2.0.0"
bytemuck = "1.12.3"

// main.rs
extern crate nalgebra as na;
use bytemuck::{Pod, Zeroable};
use colored::Colorize;
use na::{dmatrix, DMatrix};
use std::{
    io::{stdin, stdout, Write},
    sync::Arc,
    time::Instant,
};
use vulkano::{
    buffer::{BufferUsage, CpuAccessibleBuffer, DeviceLocalBuffer},
    command_buffer::{
        allocator::{CommandBufferAllocator, StandardCommandBufferAllocator},
        AutoCommandBufferBuilder, PrimaryAutoCommandBuffer, PrimaryCommandBufferAbstract,
    },
    descriptor_set::{
        allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet,
    },
    device::{
        physical::PhysicalDevice, Device, DeviceCreateInfo, DeviceExtensions, Features,
        QueueCreateInfo, QueueFlags,
    },
    instance::{Instance, InstanceCreateInfo},
    memory::allocator::{MemoryAllocator, StandardMemoryAllocator},
    pipeline::Pipeline,
    pipeline::{ComputePipeline, PipelineBindPoint},
    sync::GpuFuture,
    VulkanLibrary,
};

#[derive(Clone, Copy)]
pub enum Padding {
    None,
    Fixed(usize, usize),
    Same,
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct Dimension {
    pub rows: usize,
    pub columns: usize,
    pub channels: usize,
}

impl Dimension {
    pub fn from_matrix<T>(mat: &DMatrix<T>) -> Self {
        let shape = mat.shape();
        Self {
            rows: shape.0,
            columns: shape.1,
            channels: 1,
        }
    }
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct BufferDimensions {
    pub input_matrix: Dimension,
    pub kernel: Dimension,
    pub output_matrix: Dimension,
}

#[repr(C)]
#[derive(Default, Copy, Clone, Debug, Zeroable, Pod)]
struct ConvolutionOptions {
    pub padding: [i32; 2],
    pub stride: u32,
}

fn input(question: impl Into<String>) -> String {
    let mut result = "".to_string();
    print!("{} ", question.into().bold().cyan());
    stdout().flush().expect("Could not flush stdout");
    stdin()
        .read_line(&mut result)
        .expect("Could not read stdin");
    result
}

fn main() {
    let library = VulkanLibrary::new().expect("Could not find vulkan.dll");
    let instance =
        Instance::new(library, InstanceCreateInfo::default()).expect("Failed to Create Instance");

    println!("Available GPUs:");
    let physical_devices = instance
        .enumerate_physical_devices()
        .expect("Could not enumerate the physical devices")
        .enumerate()
        .map(|(i, physical)| {
            println!(
                "[{}]: \"{}\"; TYPE: \"{:?}\"; API_VERSION: \"{}\"",
                i.to_string().bold().bright_magenta(),
                physical.properties().device_name.to_string().bold().green(),
                physical.properties().device_type,
                physical.api_version()
            );
            physical
        })
        .collect::<Vec<Arc<PhysicalDevice>>>();

    let physical_index = input(format!("Type the chosen [{}]:", "index".bright_magenta()))
        .replace("\n", "")
        .parse::<usize>()
        .expect("Please type a number.");
    let physical = physical_devices[physical_index].clone();
    println!(
        "Using {}; TYPE: \"{:?}\"; \n\n {:?} \n\n {:#?}",
        physical.properties().device_name.to_string().bold().green(),
        physical.properties().device_type,
        physical.api_version(),
        physical.properties()
    );
    return;

    let queue_family_index = physical
        .queue_family_properties()
        .iter()
        .position(|q| {
            q.queue_flags.intersects(&QueueFlags {
                compute: true,
                ..QueueFlags::empty()
            })
        })
        .unwrap() as u32;

    let (device, mut queues) = Device::new(
        physical,
        DeviceCreateInfo {
            enabled_features: Features::empty(),
            queue_create_infos: vec![QueueCreateInfo {
                queue_family_index,
                ..Default::default()
            }],
            ..Default::default()
        },
    )
    .expect("Failed to create device");
    let queue = queues.next().unwrap();

    let memory_allocator = StandardMemoryAllocator::new_default(device.clone());
    let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone());
    let command_buffer_allocator =
        StandardCommandBufferAllocator::new(device.clone(), Default::default());

    let mut builder = AutoCommandBufferBuilder::primary(
        &command_buffer_allocator,
        queue.queue_family_index(),
        vulkano::command_buffer::CommandBufferUsage::OneTimeSubmit,
    )
    .unwrap();

    let stride = 1;
    let get_result_shape = |input_shape: usize, padding: usize, ker_shape: usize| {
        (input_shape + 2 * padding - ker_shape) / stride + 1
    };
    let padding = Padding::Same;

    let input_data = dmatrix![1.0f32, 2., 3.; 4., 5., 6.; 7., 8., 9.];
    let kernel_data = dmatrix![11.0f32, 19.; 31., 55.];
    let input_shape = Dimension::from_matrix(&input_data);
    let kernel_shape = Dimension::from_matrix(&kernel_data);

    let padding = match padding {
        Padding::None => (0, 0),
        Padding::Fixed(x_p, y_p) => (x_p, y_p),
        Padding::Same => {
            let get_padding = |input_shape: usize, ker_shape: usize| {
                (((stride - 1) as i64 * input_shape as i64 - stride as i64 + ker_shape as i64)
                    as f64
                    / 2.0)
                    .ceil() as usize
            };
            (
                /* rows */
                get_padding(input_shape.rows, kernel_shape.rows),
                /* columns */
                get_padding(input_shape.columns, kernel_shape.columns),
            )
        }
    };

    let dimensions = BufferDimensions {
        input_matrix: input_shape,
        kernel: kernel_shape,
        output_matrix: Dimension {
            rows: get_result_shape(input_shape.rows, padding.0, kernel_shape.rows),
            columns: get_result_shape(input_shape.columns, padding.1, kernel_shape.columns),
            channels: 1,
        },
    };
    let options = ConvolutionOptions {
        padding: [padding.0 as i32, padding.1 as i32],
        stride: stride as u32,
    };

    let dimensions_buffer = DeviceLocalBuffer::from_data(
        &memory_allocator,
        dimensions,
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let options_buffer = DeviceLocalBuffer::from_data(
        &memory_allocator,
        options,
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");

    println!(
        "{:?} {:?} {:?} {:?}",
        input_data, dimensions, options, kernel_data
    );

    let input_buffer = DeviceLocalBuffer::from_iter(
        &memory_allocator,
        input_data.data.as_vec().to_owned(),
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let kernel_buffer = DeviceLocalBuffer::from_iter(
        &memory_allocator,
        kernel_data.data.as_vec().to_owned(),
        BufferUsage {
            uniform_buffer: true,
            ..BufferUsage::empty()
        },
        &mut builder,
    )
    .expect("Failed to create uniform buffer.");
    let output_buffer = CpuAccessibleBuffer::from_iter(
        &memory_allocator,
        BufferUsage {
            storage_buffer: true,
            ..BufferUsage::empty()
        },
        false,
        [0..(dimensions.output_matrix.channels
            * dimensions.output_matrix.rows
            * dimensions.output_matrix.columns)]
        .map(|__| 0.0f32)
        .to_owned(),
    )
    .expect("Failed to create storage buffer.");

    println!("Loading shader");
    
let cs = cs::load(device.clone()).unwrap();

println!("Creating pipeline with shader"); // This line prints just fine 
let compute_pipeline = ComputePipeline::new(
    device.clone(),
    cs.entry_point("main").unwrap(),
    &(),
    None,
    |_| {},
)
.expect("Failed to create compute shader");
println!("Finished Creating the compute pipeline"); // THIS LINE NEVER GETS RUN

}


pub mod cs {
    use vulkano_shaders::shader;

    shader! {
        ty: "compute",
        path: "./matrix_convolution.glsl"
    }
}

着色器是:

#version 450
#pragma shader_stage(compute)

layout(local_size_x=32, local_size_y=32, local_size_z=16) in;

struct Dimension {
  uint rows;
  uint columns;
  uint channels;
};

layout(set=0, binding=0) buffer Dimensions {
  Dimension input_matrix;
  Dimension kernel;
  Dimension output_matrix;
} dims_buf;


layout(set=0, binding=1) buffer readonly InputMatrix {
  float[] input_matrix;
};

layout(set=0, binding=2) buffer readonly Kernel {
  float[] kernel;
};


layout(set=0, binding=3) buffer writeonly OutputMatrix {
   float[] output_matrix;
}; 
layout(set=0, binding=4) buffer Options {
   ivec2 padding;
   uint stride;
} options_buf;

void main() {
  const uint raw_row = gl_GlobalInvocationID.x;
  const uint raw_column = gl_GlobalInvocationID.y;
  const uint raw_channel = gl_GlobalInvocationID.z;
}

我试着用不同的着色器运行类似的程序,但效果很好。

事实证明,工作组的规模必须小于

因此:local_size_x * local_size_y * local_size_z 必须小于 max_compute_work_group_invocations

physical.properties().max_compute_work_group_invocations ```

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM