[英]4 slices of bytearray to compare using multithreading
我被困在項目的第二階段:將一個byte []拆分為4個切片(加載QuadCore I5 CPU),然后在每個切片上,在每個核心上啟動一個線程(比較任務)。
原因是試圖加快兩個相同大小的bytearrays之間的比較我如何線程化?
[DllImport("msvcrt.dll", CallingConvention = CallingConvention.Cdecl)]
static extern int memcmp(byte[] b1, byte[] b2, long count);
class ArrayView<T> : IEnumerable<T>
{
private readonly T[] array;
private readonly int offset, count;
public ArrayView(T[] array, int offset, int count)
{
this.array = array; this.offset = offset; this.count = count;
}
public int Length { get { return count; } }
public T this[int index] {
get { if (index < 0 || index >= this.count)
throw new IndexOutOfRangeException();
else
return this.array[offset + index]; }
set { if (index < 0 || index >= this.count)
throw new IndexOutOfRangeException();
else
this.array[offset + index] = value; }
}
public IEnumerator<T> GetEnumerator()
{
for (int i = offset; i < offset + count; i++)
yield return array[i];
}
IEnumerator IEnumerable.GetEnumerator()
{
IEnumerator<T> enumerator = this.GetEnumerator();
while (enumerator.MoveNext())
{
yield return enumerator.Current;
}
}
}
public void CopmarArrSlice()
{
byte[] LoadedArr = File.ReadAllBytes("testFileCompare2Scr.bmp");
int LoddArLn = OrgArr.Length;
int range = (LoddArLn / 4) - LoddAremainder;
int divisionremain = LoddArLn - (range * 4);
ArrayView<byte> LddP1 = new ArrayView<byte>(OrgArr, 0, range);
ArrayView<byte> LddP2 = new ArrayView<byte>(OrgArr, p1.Length, range);
ArrayView<byte> LddP3 = new ArrayView<byte>(OrgArr, (p1.Length + p2.Length), range);
ArrayView<byte> LddP4 = new ArrayView<byte>(OrgArr, (p1.Length + p2.Length + p3.Length), range + divisionremain);
if (AreEqual(LddP1, CapturedP1)) ....Do Somthing
}
public bool AreEqual(byte[] a, byte[] b)
{
if (a == b)
return true;
if (a == null || b == null)
return false;
if (a.Length != b.Length)
return false;
return memcmp(a, b, a.Length) == 0;
}
CopmarArrSlice();
在這種情況下,如何使用AreEqual(使用memcmp)將其與使用4個線程/ Parallelism進行比較,以計算每個CpuCore
我編寫了一個在可能的情況下利用多個核心的函數,但它似乎遭受了p / invoke調用的嚴重性能損失。 我認為這個版本只有在測試非常大的數組時才有意義。
static unsafe class NativeParallel
{
[DllImport("msvcrt.dll", CallingConvention = CallingConvention.Cdecl)]
static extern int memcmp(byte* b1, byte* b2, int count);
public static bool AreEqual(byte[] a, byte[] b)
{
// The obvious optimizations
if (a == b)
return true;
if (a == null || b == null)
return false;
if (a.Length != b.Length)
return false;
int quarter = a.Length / 4;
int r0 = 0, r1 = 0, r2 = 0, r3 = 0;
Parallel.Invoke(
() => {
fixed (byte* ap = &a[0])
fixed (byte* bp = &b[0])
r0 = memcmp(ap, bp, quarter);
},
() => {
fixed (byte* ap = &a[quarter])
fixed (byte* bp = &b[quarter])
r1 = memcmp(ap, bp, quarter);
},
() => {
fixed (byte* ap = &a[quarter * 2])
fixed (byte* bp = &b[quarter * 2])
r2 = memcmp(ap, bp, quarter);
},
() => {
fixed (byte* ap = &a[quarter * 3])
fixed (byte* bp = &b[quarter * 3])
r3 = memcmp(ap, bp, a.Length - (quarter * 3));
}
);
return r0 + r1 + r2 + r3 == 0;
}
}
在大多數情況下,它實際上比優化的安全版本慢。
static class SafeParallel
{
public static bool AreEqual(byte[] a, byte[] b)
{
if (a == b)
return true;
if (a == null || b == null)
return false;
if (a.Length != b.Length)
return false;
bool b1 = false;
bool b2 = false;
bool b3 = false;
bool b4 = false;
int quarter = a.Length / 4;
Parallel.Invoke(
() => b1 = AreEqual(a, b, 0, quarter),
() => b2 = AreEqual(a, b, quarter, quarter),
() => b3 = AreEqual(a, b, quarter * 2, quarter),
() => b4 = AreEqual(a, b, quarter * 3, a.Length)
);
return b1 && b2 && b3 && b4;
}
static bool AreEqual(byte[] a, byte[] b, int start, int length)
{
var len = length / 8;
if (len > 0)
{
for (int i = start; i < len; i += 8)
{
if (BitConverter.ToInt64(a, i) != BitConverter.ToInt64(b, i))
return false;
}
}
var remainder = length % 8;
if (remainder > 0)
{
for (int i = length - remainder; i < length; i++)
{
if (a[i] != b[i])
return false;
}
}
return true;
}
}
我認為你不必在單個線程和經典的c#方式中分割你的字節
foreach(byte currentArr in LoadedArr)
{
if (AreEqyal(currentArr, CapturedP1))
....Do Somthing
}
但是要通過將工作負載分配給多個線程來處理每個字節,您必須使用以下語法;
// max your threads count in my case 16,
int[] sums = new int[16];// optional, just to know the workload
public void ProcessMyByte(byte current)
{
if (AreEqyal(current, CapturedP1))
....Do Somthing
// optional just to know what thread is in
sums[Thread.CurrentThread.ManagedThreadId]++;// increment the number of iterations done by the thread who did this elementary process
}
.... Main()...
{
....
byte[] LoadedArr = File.ReadAllBytes("testFileCompare2Scr.bmp");
Parallel.ForEach(LoadedArr, ProcessMyByte);
...
}
所以並行性將代表你管理,甚至更好,因為當一個線程處於空閑狀態時,它會獲得下一個任務,而不是像你將它分成4個,每個線程必須處理Length / 4。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.