[英]Bulk insert / copy iEnumerable into table with npgsql
我有一個方法,它接受一個 IEnumerable,並循環通過 IEnumerable 並將每個插入到數據庫中的一個表中。 像這樣:
public void Write(IEnumerable<foo> fooData)
{
for (var index = 0; index < fooData.Count(); index++)
{
var sql = @"insert into foo (col_id, col_name) values (@col_id, col_name)";
}
}
foo 是一個 class ,它反映了數據庫中的表:
public class Foo
{
public int col_id {get;set;}
public string col_name {get;set;}
}
事實證明,如果我的行有數千個條目,則使用 for 循環效率不高。 當涉及 IEnumerable 時,復制所有這些數據的更有效方法是什么?
我編寫了一個 class,其行為類似於 Postgres 的內置 SqlBulkCopy class。 它包裝了COPY
命令以提供快速上傳。 IEnumerable 的方法看起來像這樣( DataTable
也有類似的方法)。
public void WriteToServer<T>(IEnumerable<T> data)
{
try
{
if (DestinationTableName == null || DestinationTableName == "")
{
throw new ArgumentOutOfRangeException("DestinationTableName", "Destination table must be set");
}
PropertyInfo[] properties = typeof(T).GetProperties();
int colCount = properties.Length;
NpgsqlDbType[] types = new NpgsqlDbType[colCount];
int[] lengths = new int[colCount];
string[] fieldNames = new string[colCount];
using (var cmd = new NpgsqlCommand("SELECT * FROM " + DestinationTableName + " LIMIT 1", conn))
{
using (var rdr = cmd.ExecuteReader())
{
if (rdr.FieldCount != colCount)
{
throw new ArgumentOutOfRangeException("dataTable", "Column count in Destination Table does not match column count in source table.");
}
var columns = rdr.GetColumnSchema();
for (int i = 0; i < colCount; i++)
{
types[i] = (NpgsqlDbType)columns[i].NpgsqlDbType;
lengths[i] = columns[i].ColumnSize == null ? 0 : (int)columns[i].ColumnSize;
fieldNames[i] = columns[i].ColumnName;
}
}
}
var sB = new StringBuilder(fieldNames[0]);
for (int p = 1; p < colCount; p++)
{
sB.Append(", " + fieldNames[p]);
}
using (var writer = conn.BeginBinaryImport("COPY " + DestinationTableName + " (" + sB.ToString() + ") FROM STDIN (FORMAT BINARY)"))
{
foreach (var t in data)
{
writer.StartRow();
for (int i = 0; i < colCount; i++)
{
if (properties[i].GetValue(t) == null)
{
writer.WriteNull();
}
else
{
switch (types[i])
{
case NpgsqlDbType.Bigint:
writer.Write((long)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Bit:
if (lengths[i] > 1)
{
writer.Write((byte[])properties[i].GetValue(t), types[i]);
}
else
{
writer.Write((byte)properties[i].GetValue(t), types[i]);
}
break;
case NpgsqlDbType.Boolean:
writer.Write((bool)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Bytea:
writer.Write((byte[])properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Char:
if (properties[i].GetType() == typeof(string))
{
writer.Write((string)properties[i].GetValue(t), types[i]);
}
else if (properties[i].GetType() == typeof(Guid))
{
var value = properties[i].GetValue(t).ToString();
writer.Write(value, types[i]);
}
else if (lengths[i] > 1)
{
writer.Write((char[])properties[i].GetValue(t), types[i]);
}
else
{
var s = ((string)properties[i].GetValue(t).ToString()).ToCharArray();
writer.Write(s[0], types[i]);
}
break;
case NpgsqlDbType.Time:
case NpgsqlDbType.Timestamp:
case NpgsqlDbType.TimestampTz:
case NpgsqlDbType.Date:
writer.Write((DateTime)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Double:
writer.Write((double)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Integer:
try
{
if (properties[i].GetType() == typeof(int))
{
writer.Write((int)properties[i].GetValue(t), types[i]);
break;
}
else if (properties[i].GetType() == typeof(string))
{
var swap = Convert.ToInt32(properties[i].GetValue(t));
writer.Write((int)swap, types[i]);
break;
}
}
catch (Exception ex)
{
string sh = ex.Message;
}
writer.Write((object)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Interval:
writer.Write((TimeSpan)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Numeric:
case NpgsqlDbType.Money:
writer.Write((decimal)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Real:
writer.Write((Single)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Smallint:
try
{
if (properties[i].GetType() == typeof(byte))
{
var swap = Convert.ToInt16(properties[i].GetValue(t));
writer.Write((short)swap, types[i]);
break;
}
writer.Write((short)properties[i].GetValue(t), types[i]);
}
catch (Exception ex)
{
string ms = ex.Message;
}
break;
case NpgsqlDbType.Varchar:
case NpgsqlDbType.Text:
writer.Write((string)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Uuid:
writer.Write((Guid)properties[i].GetValue(t), types[i]);
break;
case NpgsqlDbType.Xml:
writer.Write((string)properties[i].GetValue(t), types[i]);
break;
}
}
}
}
writer.Complete();
}
}
catch (Exception ex)
{
throw new Exception("Error executing NpgSqlBulkCopy.WriteToServer(). See inner exception for details", ex);
}
}
您需要先設置屬性 DestinationTableName 並且 conn 需要是一個打開的連接。
本質上,該方法使用Reflection
來獲取傳遞列表類型的屬性。 顯然,要填充的表的數據類型必須匹配,編寫器通過遍歷列表來構建。 然后在最后做一個批量插入,我可能沒有處理你需要的所有類型,但應該清楚如何添加它們。 他們失蹤的地方。
編輯
被要求分享它,這里是 DataTable 的等價物:
public void WriteToServer(DataTable dataTable)
{
try
{
if (DestinationTableName == null || DestinationTableName == "")
{
throw new ArgumentOutOfRangeException("DestinationTableName", "Destination table must be set");
}
int colCount = dataTable.Columns.Count;
NpgsqlDbType[] types = new NpgsqlDbType[colCount];
int[] lengths = new int[colCount];
string[] fieldNames = new string[colCount];
using (var cmd = new NpgsqlCommand("SELECT * FROM " + DestinationTableName + " LIMIT 1", conn))
{
using (var rdr = cmd.ExecuteReader())
{
if (rdr.FieldCount != colCount)
{
throw new ArgumentOutOfRangeException("dataTable", "Column count in Destination Table does not match column count in source table.");
}
var columns = rdr.GetColumnSchema();
for (int i = 0; i < colCount; i++)
{
types[i] = (NpgsqlDbType)columns[i].NpgsqlDbType;
lengths[i] = columns[i].ColumnSize == null ? 0 : (int)columns[i].ColumnSize;
fieldNames[i] = columns[i].ColumnName;
}
}
}
var sB = new StringBuilder(fieldNames[0]);
for (int p = 1; p < colCount; p++)
{
sB.Append(", " + fieldNames[p]);
}
using (var writer = conn.BeginBinaryImport("COPY " + DestinationTableName + " (" + sB.ToString() + ") FROM STDIN (FORMAT BINARY)"))
{
for (int j = 0; j < dataTable.Rows.Count; j++)
{
DataRow dR = dataTable.Rows[j];
writer.StartRow();
for (int i = 0; i < colCount; i++)
{
if (dR[i] == DBNull.Value)
{
writer.WriteNull();
}
else
{
switch (types[i])
{
case NpgsqlDbType.Bigint:
writer.Write((long)dR[i], types[i]);
break;
case NpgsqlDbType.Bit:
if (lengths[i] > 1)
{
writer.Write((byte[])dR[i], types[i]);
}
else
{
writer.Write((byte)dR[i], types[i]);
}
break;
case NpgsqlDbType.Boolean:
writer.Write((bool)dR[i], types[i]);
break;
case NpgsqlDbType.Bytea:
writer.Write((byte[])dR[i], types[i]);
break;
case NpgsqlDbType.Char:
if (dR[i] is string)
{
writer.Write((string)dR[i], types[i]);
}
else if (dR[i] is Guid)
{
var value = dR[i].ToString();
writer.Write(value, types[i]);
}
else if (lengths[i] > 1)
{
writer.Write((char[])dR[i], types[i]);
}
else
{
var s = ((string)dR[i].ToString()).ToCharArray();
writer.Write(s[0], types[i]);
}
break;
case NpgsqlDbType.Time:
case NpgsqlDbType.Timestamp:
case NpgsqlDbType.TimestampTz:
case NpgsqlDbType.Date:
writer.Write((DateTime)dR[i], types[i]);
break;
case NpgsqlDbType.Double:
writer.Write((double)dR[i], types[i]);
break;
case NpgsqlDbType.Integer:
try
{
if (dR[i] is int)
{
writer.Write((int)dR[i], types[i]);
break;
}
else if (dR[i] is string)
{
var swap = Convert.ToInt32(dR[i]);
writer.Write((int)swap, types[i]);
break;
}
}
catch (Exception ex)
{
string sh = ex.Message;
}
writer.Write((object)dR[i], types[i]);
break;
case NpgsqlDbType.Interval:
writer.Write((TimeSpan)dR[i], types[i]);
break;
case NpgsqlDbType.Numeric:
case NpgsqlDbType.Money:
writer.Write((decimal)dR[i], types[i]);
break;
case NpgsqlDbType.Real:
writer.Write((Single)dR[i], types[i]);
break;
case NpgsqlDbType.Smallint:
try
{
if (dR[i] is byte)
{
var swap = Convert.ToInt16(dR[i]);
writer.Write((short)swap, types[i]);
break;
}
writer.Write((short)dR[i], types[i]);
}
catch (Exception ex)
{
string ms = ex.Message;
}
break;
case NpgsqlDbType.Varchar:
case NpgsqlDbType.Text:
writer.Write((string)dR[i], types[i]);
break;
case NpgsqlDbType.Uuid:
writer.Write((Guid)dR[i], types[i]);
break;
case NpgsqlDbType.Xml:
writer.Write((string)dR[i], types[i]);
break;
}
}
}
}
writer.Complete();
}
}
catch (Exception ex)
{
throw new Exception("Error executing NpgSqlBulkCopy.WriteToServer(). See inner exception for details", ex);
}
}
與IEnumerable
實現一樣,可能缺少一些數據類型,但很容易擴展。
我使用這個https://github.com/PostgreSQLCopyHelper/PostgreSQLCopyHelper非常快。
這是一個示例,我正在加載我的自定義 class 的實例列表(稱為 single_bar,它有 6 個變量,而不是你的 Foo class 和你的 2)。 Copy helper 將 class 中的變量映射到您要寫入的列。
// We make a copy helper, to upload to the DB
PostgreSQLCopyHelper<single_bar> copyHelper = new PostgreSQLCopyHelper<single_bar>(temp_table_name)
.MapTimeStamp("bar_time", x => x.bar_time)
.MapReal("open_px", x => x.open_px)
.MapReal("high_px", x => x.high_px)
.MapReal("low_px", x => x.low_px)
.MapReal("close_px", x => x.close_px)
.MapReal("trade_volume", x => x.getrealvol());
// Upload the data to the temp table.
ulong NoUploaded = copyHelper.SaveAll(conn, dataset);
result = Convert.ToInt32(NoUploaded);
我創建了一個臨時表作為復制操作的目標,然后對最終表執行 upsert。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.