[英]Improve Binary Serialization Performance for large List of structs
我有一個結構,持有3個整數的三維坐標。 在測試中,我將一百個隨機點的List <>組合在一起,然后將二進制序列化用於內存流。
內存流大約為21 MB - 這似乎非常低效,因為1000000點* 3個coords * 4個字節應該在最小11MB時出現
它在我的測試台上也需要約3秒鍾。
有什么改善性能和/或尺寸的想法?
(如果有幫助,我不必保留ISerialzable接口,我可以直接寫入內存流)
編輯 - 從下面的答案我已經把一個序列化攤牌比較BinaryFormatter,'原始'BinaryWriter和Protobuf
using System;
using System.Text;
using System.Collections.Generic;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Formatters.Binary;
using System.IO;
using ProtoBuf;
namespace asp_heatmap.test
{
[Serializable()] // For .NET BinaryFormatter
[ProtoContract] // For Protobuf
public class Coordinates : ISerializable
{
[Serializable()]
[ProtoContract]
public struct CoOrd
{
public CoOrd(int x, int y, int z)
{
this.x = x;
this.y = y;
this.z = z;
}
[ProtoMember(1)]
public int x;
[ProtoMember(2)]
public int y;
[ProtoMember(3)]
public int z;
}
internal Coordinates()
{
}
[ProtoMember(1)]
public List<CoOrd> Coords = new List<CoOrd>();
public void SetupTestArray()
{
Random r = new Random();
List<CoOrd> coordinates = new List<CoOrd>();
for (int i = 0; i < 1000000; i++)
{
Coords.Add(new CoOrd(r.Next(), r.Next(), r.Next()));
}
}
#region Using Framework Binary Formatter Serialization
void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("Coords", this.Coords);
}
internal Coordinates(SerializationInfo info, StreamingContext context)
{
this.Coords = (List<CoOrd>)info.GetValue("Coords", typeof(List<CoOrd>));
}
#endregion
# region 'Raw' Binary Writer serialization
public MemoryStream RawSerializeToStream()
{
MemoryStream stream = new MemoryStream(Coords.Count * 3 * 4 + 4);
BinaryWriter writer = new BinaryWriter(stream);
writer.Write(Coords.Count);
foreach (CoOrd point in Coords)
{
writer.Write(point.x);
writer.Write(point.y);
writer.Write(point.z);
}
return stream;
}
public Coordinates(MemoryStream stream)
{
using (BinaryReader reader = new BinaryReader(stream))
{
int count = reader.ReadInt32();
Coords = new List<CoOrd>(count);
for (int i = 0; i < count; i++)
{
Coords.Add(new CoOrd(reader.ReadInt32(),reader.ReadInt32(),reader.ReadInt32()));
}
}
}
#endregion
}
[TestClass]
public class SerializationTest
{
[TestMethod]
public void TestBinaryFormatter()
{
Coordinates c = new Coordinates();
c.SetupTestArray();
// Serialize to memory stream
MemoryStream mStream = new MemoryStream();
BinaryFormatter bformatter = new BinaryFormatter();
bformatter.Serialize(mStream, c);
Console.WriteLine("Length : {0}", mStream.Length);
// Now Deserialize
mStream.Position = 0;
Coordinates c2 = (Coordinates)bformatter.Deserialize(mStream);
Console.Write(c2.Coords.Count);
mStream.Close();
}
[TestMethod]
public void TestBinaryWriter()
{
Coordinates c = new Coordinates();
c.SetupTestArray();
MemoryStream mStream = c.RawSerializeToStream();
Console.WriteLine("Length : {0}", mStream.Length);
// Now Deserialize
mStream.Position = 0;
Coordinates c2 = new Coordinates(mStream);
Console.Write(c2.Coords.Count);
}
[TestMethod]
public void TestProtoBufV2()
{
Coordinates c = new Coordinates();
c.SetupTestArray();
MemoryStream mStream = new MemoryStream();
ProtoBuf.Serializer.Serialize(mStream,c);
Console.WriteLine("Length : {0}", mStream.Length);
mStream.Position = 0;
Coordinates c2 = ProtoBuf.Serializer.Deserialize<Coordinates>(mStream);
Console.Write(c2.Coords.Count);
}
}
}
結果(注意PB v2.0.0.423 beta)
Serialize | Ser + Deserialize | Size
-----------------------------------------------------------
BinaryFormatter 2.89s | 26.00s !!! | 21.0 MB
ProtoBuf v2 0.52s | 0.83s | 18.7 MB
Raw BinaryWriter 0.27s | 0.36s | 11.4 MB
顯然,這只是關注速度/尺寸,並沒有考慮其他任何事情。
使用BinaryFormatter
二進制序列化包括它生成的字節中的類型信息。 這占用了額外的空間。 例如,在您不知道另一端需要什么樣的數據結構的情況下,它非常有用。
在您的情況下,您知道數據在兩端的格式,並且聽起來不會改變。 所以你可以編寫一個簡單的編碼和解碼方法。 您的CoOrd類不再需要可序列化。
我將使用System.IO.BinaryReader和System.IO.BinaryWriter ,然后遍歷每個CoOrd實例並讀取/寫入流的X,Y,Z屬性值。 假設您的許多數字小於0x7F和0x7FFF,那些類甚至會將您的整數打包成小於11MB。
像這樣的東西:
using (var writer = new BinaryWriter(stream)) {
// write the number of items so we know how many to read out
writer.Write(points.Count);
// write three ints per point
foreach (var point in points) {
writer.Write(point.X);
writer.Write(point.Y);
writer.Write(point.Z);
}
}
要從流中讀取:
List<CoOrd> points;
using (var reader = new BinaryReader(stream)) {
var count = reader.ReadInt32();
points = new List<CoOrd>(count);
for (int i = 0; i < count; i++) {
var x = reader.ReadInt32();
var y = reader.ReadInt32();
var z = reader.ReadInt32();
points.Add(new CoOrd(x, y, z));
}
}
為了簡化使用預構建的串行器,我推薦使用protobuf-net ; 這里是protobuf-net v2,只添加了一些屬性:
[DataContract]
public class Coordinates
{
[DataContract]
public struct CoOrd
{
public CoOrd(int x, int y, int z)
{
this.x = x;
this.y = y;
this.z = z;
}
[DataMember(Order = 1)]
int x;
[DataMember(Order = 2)]
int y;
[DataMember(Order = 3)]
int z;
}
[DataMember(Order = 1)]
public List<CoOrd> Coords = new List<CoOrd>();
public void SetupTestArray()
{
Random r = new Random(123456);
List<CoOrd> coordinates = new List<CoOrd>();
for (int i = 0; i < 1000000; i++)
{
Coords.Add(new CoOrd(r.Next(10000), r.Next(10000), r.Next(10000)));
}
}
}
使用:
ProtoBuf.Serializer.Serialize(mStream, c);
序列化。 這需要10,960,823個字節,但請注意我調整了SetupTestArray以將大小限制為10,000,因為默認情況下它對整數使用“varint”編碼,這取決於大小。 10k在這里並不重要(事實上我沒有檢查“步驟”是什么)。 如果您更喜歡固定尺寸(允許任何范圍):
[ProtoMember(1, DataFormat = DataFormat.FixedSize)]
int x;
[ProtoMember(2, DataFormat = DataFormat.FixedSize)]
int y;
[ProtoMember(3, DataFormat = DataFormat.FixedSize)]
int z;
這需要16,998,640字節
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.