簡體   English   中英

我如何並行化這個 C# 算法?

[英]How can I parallelise this C# algorithm?

我是 C# 初學者,所以我對任務或線程一無所知。 我寫了這段代碼,我想使用某種並行或線程處理。

代碼包含在兩個數據表(A 和 B)中,我必須將 A 的每個單元格值與 B 的所有單元格進行比較。 B 包含一列和幾行。 A 可以是數百萬個單元格。 我使用 for 循環來做。 這是我想要並行化以加快進程的代碼部分:

  private DataTable CalculosPrincipales(DataTable Prof, DataTable Prop, DataTable Rango)
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("Prof Evaluar", typeof(double));
            dt.Columns.Add("Profundidad", typeof(double));
            dt.Columns.Add("Promedio", typeof(double));
            dt.Columns.Add("Sumatoria", typeof(double));
            dt.Columns.Add("n", typeof(double));

            if (int.TryParse(box_Z.Text, out int z))
            {

            }
            var step = (progressBar.Properties.Maximum - (Int32)progressBar.EditValue)/z;

            for (int i = 0; i < Rango.Rows.Count-1; i++)
            {
                dt.Rows.Add(Rango.Rows[i][0], Rango.Rows[i][1], 0, 0 , 0);
            }

            double prof_celda;
            double prof_rango;
            double prop_celda;

            for (int i = 0; i < Prop.Rows.Count; i++)
            {
                for (int j = 0; j < Prop.Columns.Count; j++)
                {
                    prop_celda = Convert.ToDouble(Prop.Rows[i][j]);

                    if (prop_celda != nullvalue)
                    {
                        for (int k = 0; k < Rango.Rows.Count; k++)
                        {
                            prof_celda = Convert.ToDouble(Prof.Rows[i][j]);
                            prof_rango = Convert.ToDouble(Rango.Rows[k][0]);

                            if (prof_celda < prof_rango)
                            {
                                dt.Rows[k][3] = Convert.ToDouble(dt.Rows[k][3]) + prop_celda;
                                dt.Rows[k][4] =  Convert.ToInt32(dt.Rows[k][4]) + 1;
                                break;
                            }
                        }
                    }
                }
                progressBar.PerformStep();
                Application.DoEvents();
            }

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (Convert.ToInt32(dt.Rows[i][4]) == 0)
                {
                    dt.Rows[i].Delete();
                    i -= 1;
                }
            }

            return dt;
        }

如果 tabla A 有 10000 個單元格,此代碼運行速度很快,200000 個單元格時需要 5 分鍾,1000000 個時需要 20 分鍾。

這是算法並行化的示例。 但是,使用 DataTable 會帶來一些性能損失。 您應該考慮使用更合適的類。

我做了以下更改:

  • 將計算提取到一個單獨的類中。
  • 將計算拆分為 n 個任務。
  • 添加了對通過 CancellationTokenSource 取消的支持
  • 將主動進度報告替換為被動進度報告。
  • 添加異常處理

現在一切都在后台運行。 您不再阻塞或減慢 UI,您只需執行計算並讓它在完成時給您回電。

您可以手動設置線程數,也可以讓算法使用 CPU 內核數,從而最大限度地提高性能。

請注意,這不是理想的實現,它只是一個示例,未經測試。

在我看來,您的描述與代碼不太匹配(您談到了 2 個輸入表,但代碼適用於 3 個 - Prop 和 Prof 不一樣嗎?)

using System;
using System.Data;
using System.Threading;
using System.Threading.Tasks;

public class ParallelCalculation
{
    public delegate void CompletionHandler(DataTable result, Exception exception);
    public DataTable Prof, Prop, Rango;

    class Part
    {
        public DataTable Result;
        public int FromRow, ToRow;
        public float Progress;
        public Exception Exception;
    }

    DataTable result;
    Part[] parts;
    Task[] tasks;
    CancellationToken cancellation;
    CompletionHandler callback;

    public async void Run(CompletionHandler callback, CancellationToken token, int threadCount = 0)
    {
        this.cancellation = token;
        this.callback = callback;

        await Task.Factory.StartNew(Perform, threadCount);
    }

    async void Perform(object state)
    {
        int threadCount = (int)state;

        try
        {
            // Create table for results
            result = new DataTable();
            result.Columns.Add("Prof Evaluar", typeof(double));
            result.Columns.Add("Profundidad", typeof(double));
            result.Columns.Add("Promedio", typeof(double));
            result.Columns.Add("Sumatoria", typeof(double));
            result.Columns.Add("n", typeof(double));

            for (int i = 0; i < Rango.Rows.Count; i++)
                result.Rows.Add(Rango.Rows[i][0], Rango.Rows[i][1], 0, 0, 0);

            // Split calculation into n tasks. Tasks work in parallel,
            // each one processes it's own stripe of data, defined by the instance of the Part class.
            int n = threadCount > 0 ? threadCount : Environment.ProcessorCount;
            tasks = new Task[n];
            parts = new Part[n];
            int rowsPerTask = Prof.Rows.Count / n;
            int rest = Prof.Rows.Count % n;
            for (int i = 0, from = 0, to = 0; i < n; ++i, --rest, from = to)
            {
                to = from + rowsPerTask + (rest > 0 ? 1 : 0);
                parts[i] = new Part { FromRow = from, ToRow = to };
                tasks[i] =  Task.Factory.StartNew(CalculatePart, parts[i]);
            }

            // Wait until all partial calculations are finished
            await Task.WhenAll(tasks);

            // Sum partial results to the main result table (and find the first exception, if any)
            Exception e = null;
            foreach (var part in parts)
            {
                e = e ?? part.Exception;
                for (int row = 0; row < result.Rows.Count; ++row)
                {
                    result.Rows[row][3] = Convert.ToDouble(result.Rows[row][3]) + Convert.ToDouble(part.Result.Rows[row][3]);
                    result.Rows[row][4] = Convert.ToInt32(result.Rows[row][4]) + Convert.ToInt32(part.Result.Rows[row][4]);
                }
            }

            // Remove empty rows from results
            for (int i = 0; i < result.Rows.Count; i++)
            {
                if (Convert.ToInt32(result.Rows[i][4]) == 0)
                {
                    result.Rows[i].Delete();
                    i -= 1;
                }
            }

            // Call back 
            callback?.Invoke(result, e);
        }
        catch (Exception e)
        {
            callback?.Invoke(null, e);
        }
    }

    void CalculatePart(object state)
    {
        var part = (Part)state;
        try
        {
            // Create our own table for partial results.
            part.Result = this.result.Copy();

            var result = part.Result; // Just a shortcut

            int cols = Prop.Columns.Count;
            int steps = cols * (part.ToRow - part.FromRow);

            for (int i = part.FromRow, step = 1; i < part.ToRow; i++)
            {
                for (int j = 0; j < cols; j++, step++)
                {
                    var prop_celda_obj = Prop.Rows[i][j];
                    if (prop_celda_obj != DBNull.Value)
                    {
                        double prop_celda = Convert.ToDouble(prop_celda_obj);
                        double prof_celda = Convert.ToDouble(Prof.Rows[i][j]);

                        for (int k = 0; k < Rango.Rows.Count; k++)
                        {
                            //double prof_celda = Convert.ToDouble(Prof.Rows[i][j]);
                            double prof_rango = Convert.ToDouble(Rango.Rows[k][0]);

                            if (prof_celda < prof_rango)
                            {
                                result.Rows[k][3] = Convert.ToDouble(result.Rows[k][3]) + prop_celda;
                                result.Rows[k][4] = Convert.ToDouble(result.Rows[k][4]) + 1;
                                break;
                            }
                        }
                    }

                    part.Progress = step / (float)steps;
                    if (cancellation.IsCancellationRequested)
                        return;
                }
            }
        }
        catch (Exception e)
        {
            part.Exception = e;
        }
    }

    public float Progress()
    {
        float sum = 0.0f;
        foreach (var part in parts)
            sum += part.Progress;
        return sum / parts.Length;
    }
}

以下代碼是在 Form 中使用上述類的示例。 你可能需要稍微調整一下。

partial class MyForm {   

    Button btnStartStop;
    ProgressBar progressBar;

    // Do this somewhere:
    // btnStartStop.Click += BtnStartStop_Click;

    int threads = 0;              // 0 means "The number of CPU cores"
    DataTable Prof, Prop, Rango;  // You have to provide these values

    // The final results will be stored here:
    DataTable Result;

    CancellationTokenSource cancellation;
    ParallelCalculation calculation;
    System.Windows.Forms.Timer progressTimer;

    void BtnStartStop_Click(object sender, EventArgs e)
    {
        if (calculation != null)
            cancellation.Cancel();
        else
            StartCalculation();
    }

    void StartCalculation()
    {
        cancellation = new CancellationTokenSource();
        calculation = new ParallelCalculation { Prof = this.Prof, Prop = this.Prop, Rango = this.Rango };
        calculation.Run(Finished, cancellation.Token, threads);

        progressBar.Value = 0;
        progressTimer = new System.Windows.Forms.Timer(components) { Interval = 100 };
        progressTimer.Tick += ProgressTimer_Tick;
        progressTimer.Start();

        UpdateUI();
    }

    void Finished(DataTable table, Exception e)
    {
        BeginInvoke((Action)delegate
        {
            Result = table;
            progressBar.Value = (int)(calculation.Progress() * 100);
            progressTimer.Stop();
            progressTimer.Tick -= ProgressTimer_Tick;
            calculation = null;

            UpdateUI();
        });
    }

    private void ProgressTimer_Tick(object sender, EventArgs e)
    {
        if (calculation != null)
            progressBar.Value = (int)(calculation.Progress() * 100);
    }

    void UpdateUI()
    {
        btnStartStop.Text = calculation == null ? "Start" : "Stop";
    }
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM