Linux DMA：使用DMAengine進行分散 - 收集事務

Question

我嘗試使用自定義內核驅動程序中的DMAengine API來執行分散 - 收集操作。 我有一個連續的存儲區域作為源極和我想通過一個散布表結構中幾個分布式緩沖器復制其數據。 DMA控制器是支持DMAengine API的PL330（參見PL330 DMA控制器）。

我的測試代碼如下：

在我的驅動程序頭文件（ test_driver.h ）中：

#ifndef __TEST_DRIVER_H__
#define __TEST_DRIVER_H__

#include <linux/platform_device.h>
#include <linux/device.h>

#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/of_dma.h>

#define SG_ENTRIES 3
#define BUF_SIZE 16
#define DEV_BUF 0x10000000

struct dma_block {
    void * data;
    int size;
};

struct dma_private_info {

    struct sg_table sgt;

    struct dma_block * blocks;
    int nblocks;

    int dma_started;

    struct dma_chan * dma_chan;
    struct dma_slave_config dma_config;
    struct dma_async_tx_descriptor * dma_desc;
    dma_cookie_t cookie;
};

struct test_platform_device {
    struct platform_device * pdev;

    struct dma_private_info dma_priv;
};

#define _get_devp(tdev) (&((tdev)->pdev->dev))
#define _get_dmapip(tdev) (&((tdev)->dma_priv))

int dma_stop(struct test_platform_device * tdev);
int dma_start(struct test_platform_device * tdev);
int dma_start_block(struct test_platform_device * tdev);
int dma_init(struct test_platform_device * tdev);
int dma_exit(struct test_platform_device * tdev);

#endif

在我的源代碼中包含dma函數（ dma_functions.c ）：

#include <linux/slab.h>

#include "test_driver.h"

#define BARE_RAM_BASE 0x10000000
#define BARE_RAM_SIZE 0x10000000

struct ram_bare {
    uint32_t * __iomem map;

    uint32_t base;
    uint32_t size;
};

static void dma_sg_check(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    uint32_t * buf;
    unsigned int bufsize;
    int nwords;
    int nbytes_word = sizeof(uint32_t);
    int nblocks;
    struct ram_bare ramb;
    uint32_t * p;
    int i;
    int j;

    ramb.map = ioremap(BARE_RAM_BASE,BARE_RAM_SIZE);
    ramb.base = BARE_RAM_BASE;
    ramb.size = BARE_RAM_SIZE;

    dev_info(dev,"nblocks: %d \n",dma_priv->nblocks);

    p = ramb.map;

    nblocks = dma_priv->nblocks;

    for( i = 0 ; i < nblocks ; i++ ) {

        buf = (uint32_t *) dma_priv->blocks[i].data;
        bufsize = dma_priv->blocks[i].size;
        nwords = dma_priv->blocks[i].size/nbytes_word;

        dev_info(dev,"block[%d],size %d: ",i,bufsize);

        for ( j = 0 ; j <  nwords; j++, p++) {
            dev_info(dev,"DMA: 0x%x, RAM: 0x%x",buf[j],ioread32(p));
        }
    }

    iounmap(ramb.map);
}

static int dma_sg_exit(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    int ret = 0;
    int i;

    for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
        kfree(dma_priv->blocks[i].data);
    }

    kfree(dma_priv->blocks);

    sg_free_table(&(dma_priv->sgt));

    return ret;
}

int dma_stop(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    int ret = 0;

    dma_unmap_sg(dev,dma_priv->sgt.sgl,\
        dma_priv->sgt.nents, DMA_FROM_DEVICE);

    dma_sg_exit(tdev);

    dma_priv->dma_started = 0;

    return ret;
}

static void dma_callback(void * param)
{
    enum dma_status dma_stat;
    struct test_platform_device * tdev = (struct test_platform_device *) param;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    dev_info(dev,"Checking the DMA state....\n");

    dma_stat = dma_async_is_tx_complete(dma_priv->dma_chan,\
        dma_priv->cookie, NULL, NULL);

    if(dma_stat == DMA_COMPLETE) {
        dev_info(dev,"DMA complete! \n");
        dma_sg_check(tdev);
        dma_stop(tdev);
    } else if (unlikely(dma_stat == DMA_ERROR)) {
        dev_info(dev,"DMA error! \n");
        dma_stop(tdev);
    }
}

static void dma_busy_loop(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    enum dma_status status;
    int status_change = -1;

    do {
        status = dma_async_is_tx_complete(dma_priv->dma_chan, dma_priv->cookie, NULL, NULL);

        switch(status) {
        case DMA_COMPLETE:
            if(status_change != 0)
                dev_info(dev,"DMA status: COMPLETE\n");
            status_change = 0;
            break;
        case DMA_PAUSED:
            if (status_change != 1)
                dev_info(dev,"DMA status: PAUSED\n");
            status_change = 1;
            break;
        case DMA_IN_PROGRESS:
            if(status_change != 2)
                dev_info(dev,"DMA status: IN PROGRESS\n");
            status_change = 2;
            break;
        case DMA_ERROR:
            if (status_change != 3)
                dev_info(dev,"DMA status: ERROR\n");
            status_change = 3;
            break;
        default:
            dev_info(dev,"DMA status: UNKNOWN\n");
            status_change = -1;
            break;
        }
    } while(status != DMA_COMPLETE);

    dev_info(dev,"DMA transaction completed! \n");
}

static int dma_sg_init(struct test_platform_device * tdev)
{

    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct scatterlist *sg;
    int ret = 0;
    int i;

    ret = sg_alloc_table(&(dma_priv->sgt), SG_ENTRIES, GFP_ATOMIC);
    if(ret)
        goto out_mem2;

    dma_priv->nblocks = SG_ENTRIES;
    dma_priv->blocks = (struct dma_block *) kmalloc(dma_priv->nblocks\
        *sizeof(struct dma_block), GFP_ATOMIC);
    if(dma_priv->blocks == NULL) 
         goto out_mem1;


    for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
        dma_priv->blocks[i].size = BUF_SIZE;
        dma_priv->blocks[i].data = kmalloc(dma_priv->blocks[i].size, GFP_ATOMIC);
        if(dma_priv->blocks[i].data == NULL)
            goto out_mem3;
    }

    for_each_sg(dma_priv->sgt.sgl, sg, dma_priv->sgt.nents, i)
        sg_set_buf(sg,dma_priv->blocks[i].data,dma_priv->blocks[i].size);

    return ret;

out_mem3:
    i--;

    while(i >= 0)
        kfree(dma_priv->blocks[i].data);

    kfree(dma_priv->blocks);

out_mem2:
    sg_free_table(&(dma_priv->sgt));

out_mem1:
    ret = -ENOMEM;  

    return ret;

}

static int _dma_start(struct test_platform_device * tdev,int block)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    int ret = 0;
    int sglen;

    /* Step 1: Allocate and initialize the SG list */
    dma_sg_init(tdev);

    /* Step 2: Map the SG list */
    sglen = dma_map_sg(dev,dma_priv->sgt.sgl,\
        dma_priv->sgt.nents, DMA_FROM_DEVICE);
    if(! sglen)
        goto out2;

    /* Step 3: Configure the DMA */
    (dma_priv->dma_config).direction = DMA_DEV_TO_MEM;
    (dma_priv->dma_config).src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    (dma_priv->dma_config).src_maxburst = 1;
    (dma_priv->dma_config).src_addr = (dma_addr_t) DEV_BUF;

    dmaengine_slave_config(dma_priv->dma_chan, \
        &(dma_priv->dma_config));

    /* Step 4: Prepare the SG descriptor */
    dma_priv->dma_desc = dmaengine_prep_slave_sg(dma_priv->dma_chan, \
        dma_priv->sgt.sgl, dma_priv->sgt.nents, DMA_DEV_TO_MEM, \
        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
    if (dma_priv->dma_desc == NULL) {
        dev_err(dev,"DMA could not assign a descriptor! \n");
        goto out1;
    }

    /* Step 5: Set the callback method */
    (dma_priv->dma_desc)->callback = dma_callback;
    (dma_priv->dma_desc)->callback_param = (void *) tdev;

    /* Step 6: Put the DMA descriptor in the queue */
    dma_priv->cookie = dmaengine_submit(dma_priv->dma_desc);

    /* Step 7: Fires the DMA transaction */
    dma_async_issue_pending(dma_priv->dma_chan);

    dma_priv->dma_started = 1;

    if(block)
        dma_busy_loop(tdev);

    return ret;

out1:
    dma_stop(tdev);
out2:
    ret = -1;

    return ret;
}

int dma_start(struct test_platform_device * tdev) {
    return _dma_start(tdev,0);
}

int dma_start_block(struct test_platform_device * tdev) {
    return _dma_start(tdev,1);
}

int dma_init(struct test_platform_device * tdev)
{
    int ret = 0;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    dma_priv->dma_chan = dma_request_slave_channel(dev, \
        "dma_chan0");
    if (dma_priv->dma_chan == NULL) {
        dev_err(dev,"DMA channel busy! \n");
        ret = -1;
    }

    dma_priv->dma_started = 0;

    return ret;
}

int dma_exit(struct test_platform_device * tdev)
{
    int ret = 0;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);

    if(dma_priv->dma_started) {
        dmaengine_terminate_all(dma_priv->dma_chan);
        dma_stop(tdev);
        dma_priv->dma_started = 0;
    }

    if(dma_priv->dma_chan != NULL)
        dma_release_channel(dma_priv->dma_chan);

    return ret;
}

在我的驅動程序源文件（ test_driver.c ）中：

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/version.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/interrupt.h>

#include "test_driver.h"

static int dma_block=0;
module_param_named(dma_block, dma_block, int, 0444);

static struct test_platform_device tdev;

static struct of_device_id test_of_match[] = {
  { .compatible = "custom,test-driver-1.0", },
  {}
};

static int test_probe(struct platform_device *op)
{
    int ret = 0;
    struct device * dev = &(op->dev);

    const struct of_device_id *match = of_match_device(test_of_match, &op->dev);

    if (!match)
        return -EINVAL;

    tdev.pdev = op;

    dma_init(&tdev);

    if(dma_block)
        ret = dma_start_block(&tdev);
    else
        ret = dma_start(&tdev);

    if(ret) {
        dev_err(dev,"Error to start DMA transaction! \n");
    } else {
        dev_info(dev,"DMA OK! \n");
    }

    return ret;
}

static int test_remove(struct platform_device *op)
{       
    dma_exit(&tdev);

    return 0;
}

static struct platform_driver test_platform_driver = {
  .probe = test_probe,
  .remove = test_remove,
  .driver = {
    .name = "test-driver",
    .owner = THIS_MODULE,
    .of_match_table = test_of_match,
  },
};

static int test_init(void)
{
    platform_driver_register(&test_platform_driver);
    return 0;
}

static void test_exit(void)
{
    platform_driver_unregister(&test_platform_driver);
}

module_init(test_init);
module_exit(test_exit);

MODULE_AUTHOR("klyone");
MODULE_DESCRIPTION("DMA SG test module");
MODULE_LICENSE("GPL");

但是，DMA從不調用我的回調函數，我不知道它為什么會發生。 也許，我誤解了一些事情......

誰能幫助我？

提前致謝。

Answer 1

警告：我沒有一個明確的解決方案，但只是一些觀察和建議如何調試[基於多年的編寫/調試Linux設備驅動程序的經驗]。

我認為你認為回調沒有完成，因為你沒有得到任何printk消息。 但是，回調是唯一擁有它們的地方。 但是，printk級別設置得足夠高以查看消息嗎？ 我將一個dev_info添加到你的模塊init，以證明它按預期打印。

此外，如果dma_start沒有按預期工作，你[可能]將不會得到回調，因此我也會在那里添加一些dev_info調用（例如，在步驟7中調用之前和之后）。 我還注意到並非所有調用dma_start檢查錯誤都返回[可能是罰款或無效返回，只是提到你錯過了一個]

此時，應該注意這里確實存在兩個問題：（1）您的DMA請求是否已成功啟動[並完成 ]？ （2）你有回電嗎？

所以，我將dma_complete一些代碼dma_complete為（例如） dma_test_done 。 后者執行相同的檢查，但只打印“完整”消息。 您可以在輪詢模式下調用此方法來驗證DMA完成。

所以，如果你[最終]完成了，那么問題就會減少為什么你沒有得到回調。 但是，如果你沒有[甚至]完成，那就是一個更基本的問題。

這讓我想起了。 您沒有顯示任何調用dma_start代碼或您如何等待完成。 我認為如果你的回調工作正常，它會發出某種基本級別等待的喚醒。 或者，回調將執行請求deallocate / cleanup（即您編寫的代碼更多）

在步驟7，您正在調用dma_async_issue_pending ，它應該調用pl330_issue_pending 。 pl330_issue_pending將調用pl330_tasklet 。

pl330_tasklet是一個tasklet函數，但它也可以直接調用[在沒有活動請求時啟動DMA]。

pl330_tasklet將循環其“工作”隊列並將任何已完成的項目移動到其“已完成”隊列。 然后它嘗試啟動新請求。 然后它在其完成的隊列上循環並發出回調。

pl330_tasklet獲取回調指針，但如果它為null，則會被靜默忽略。 你已經設置了一個回調，但是驗證你設置回調的地方可能是好的，或者傳播到pl330_tasklet將從中獲取回調的位置。

當您撥打電話時，一切都可能很忙，因此沒有完成的請求，沒有空間來啟動新請求，因此無需完成任何操作。 在這種情況下， pl330_tasklet將再次調用pl330_tasklet 。

所以，當dma_async_issue_pending回報，可能無任何 尚未發生。 這很可能適用於您的情況。

pl330_tasklet嘗試通過調用fill_queue來啟動新的DMA。 它將通過查看status != BUSY來檢查描述符是否[已經]忙碌status != BUSY 。 因此，您可能希望驗證您的值是否正確。 否則，你永遠不會得到回調[甚至任何DMA啟動]。

然后， fill_queue將嘗試通過pl330_submit_req啟動請求。 但是，這可能會返回錯誤（例如隊列已經滿了），因此，事情也會延遲。

作為參考，請注意pl330_submit_req頂部的以下注釋：

Submit a list of xfers after which the client wants notification.
Client is not notified after each xfer unit, just once after all
xfer units are done or some error occurs.

我要做的是開始攻擊pl330.c並添加調試消息和交叉檢查。 如果您的系統是pl330為許多其他請求提供服務，您可以通過檢查設備的私有數據指針是否與您的匹配來限制調試消息。

特別是，您希望在請求實際啟動時收到消息，因此您可以在pl330_submit_req的末尾添加調試消息

然后，在pl330_tasklet為請求添加消息也會有所幫助。

這是兩個很好的起點。 但是，不要害怕根據需要添加更多的printk調用。 您可能會對所謂的[或未被調用]或以何種順序感到驚訝。

更新：

如果我使用阻塞行為安裝內核模塊，那么一切都會很好地初始化。 但是，dma_busy_loop函數顯示DMA描述符始終為IN PROGESS且DMA事務永遠不會完成。 因此，不執行回調函數。 可能會發生什么？

做了一點研究。 Cookie只是遞增的序列號。 例如，如果您發出的請求被分解為[例如] 10個單獨的分散/收集操作[描述符]，則每個請求都會獲得唯一的cookie值。 cookie返回值是最新/最后一組（例如10）。

當你調用（1） dma_async_is_tx_complete ，（2）它調用dma_async_is_tx_complete chan->device->device_tx_status ，（3）這是pl330_tx_status ，（4）調用dma_cookie_status

旁注/提示：當我跟蹤它時，我只是在dmaengine.h和pl330.c之間來回翻轉。 它就像：看（1），它叫（2）。 那套在哪里？ 在pl330.c ，我推測。 所以，我抓住了字符串並獲得了pl330函數的名稱（即（3））。 所以，我去那里，看到它確實如此（4）。 那么......回到dmaengine.h ......

但是，當您進行外部調用時，您忽略了[設置為NULL]后兩個參數。 這些可能很有用，因為它們返回“最后”和“使用過的”cookie。 因此，即使您沒有完全完成，這些值也可能會發生變化並顯示出部分進展。

其中一個應該最終> =到“返回”cookie值。 （即）整個操作應該完成。 因此，這將有助於區分可能發生的事情。

另外，還要注意在dmaengine.h ，下方dma_async_is_tx_complete ，有dma_async_is_complete 。 此函數根據您傳遞的cookie值以及“last”和“used”cookie值決定是返回DMA_COMPLETE還是DMA_IN_PROGRESS 。 它是被動的，並沒有在代碼路徑[AFAICT]中使用，但它確實顯示了如何自己計算完成。

Linux DMA：使用DMAengine進行分散 - 收集事務

問題描述

1 個解決方案

解決方案1
4 已采納 2016-05-09 20:24:00

Linux DMA：使用DMAengine進行分散 - 收集事務

問題描述

1 個解決方案

解決方案1 4 已采納 2016-05-09 20:24:00

解決方案1
4 已采納 2016-05-09 20:24:00