[英]Looping through OleDbDataReader .Read to produce delimited text file. Why does this code slow down as it loops?
我有一个 VB.NET 应用程序,它使用 Oracle OleDbDataReader 从 Oracle 数据库中提取约 1500 万行并将它们写入 | 分隔的文本文件。
Private Sub GenerateTextSqlReportWithCurrent(report As TblreportEntity, filename As String)
Const batchSize = 20000
Dim encryption As New ClassEncrypt
'get data
LogEvent($"INFO: Opening DataReader for report {report.ReportName}")
Dim reader As OleDbDataReader = IMOracle2.GetDataReader(report.Sql, IMOracle2.GetConnectString(My.Settings.DB_Instance, encryption.Decrypt(My.Settings.DB_UserID), encryption.Decrypt(My.Settings.DB_PWD)))
LogEvent($"INFO: Finished Opening DataReader for report {report.ReportName}")
LogEvent($"INFO: writing {report.ReportName} to {filename}")
WriteToFile(filename, GetColumnTitlesHeader(reader), False)
Dim batch As New StringBuilder()
Dim lastReport As DateTime = DateTime.Now()
Dim rowCount As Integer
While reader.Read()
For i = 0 To reader.FieldCount - 1
Dim output As String
'' output = Replace(reader(i).ToString, vbCr, "")
output = Replace(reader.GetValue(i).ToString, vbCr, String.Empty)
output = Replace(output, vbLf, String.Empty)
output = Replace(output, "|", String.Empty)
batch.Append(output)
If i < reader.FieldCount - 1 Then
batch.Append("|")
End If
Next i
batch.Append(vbCrLf)
rowCount += 1
If rowCount Mod batchSize = 0 Then
Dim now = Date.Now
Dim sinceLastSeconds = DateDiff(DateInterval.Second, lastReport, now)
lastReport = now
LogEvent($"INFO: Processing row {rowCount} {sinceLastSeconds}s since last")
Dim fileWriteStart = Date.Now
'LogEvent($"INFO: Starting Writing {rowCount} row(s) to file for {report.ReportName}. {sinceLastSeconds}s since last")
WriteToFile(filename, batch.ToString(), True)
Dim fileWriteSeconds = DateDiff(DateInterval.Second, fileWriteStart, Date.Now)
LogEvent($"INFO: Finished Writing another {batchSize} row(s) to file in {fileWriteSeconds}s for {report.ReportName}")
batch.Clear()
End If
End While
'LogEvent($"INFO: Starting Writing {rowCount} row(s) to {filename} for {report.ReportName}")
WriteToFile(filename, batch.ToString(), True)
LogEvent($"INFO: Finished Writing last row(s) to {filename} for {report.ReportName}")
End Sub
Public Shared Function GetDataReader(ByVal strSQL As String, ByVal strConnection As String) As OleDb.OleDbDataReader
Dim cnn As New OleDb.OleDbConnection(strConnection)
Dim cmd As New OleDbCommand(strSQL, cnn)
cnn.Open()
GetDataReader = cmd.ExecuteReader(CommandBehavior.CloseConnection)
End Function
当这个 Sub 启动时,它会在不到 1 秒的时间内将一批行写入文本文件
07/12/2021 16:41:03: INFO: Finished Writing another 20000 row(s) to file in 0s for TAG_ATTRIBUTES
07/12/2021 16:41:03: INFO: Processing row 100000 0s since last
每个批次都比之前的批次稍慢,并且 250 万行这已减慢到每批次约 9 秒:
07/12/2021 16:51:47: INFO: Processing row 2560000 9s since last
07/12/2021 16:51:37: INFO: Finished Writing another 20000 row(s) to file in 0s for TAG_ATTRIBUTES
15,000,000:
08/12/2021 05:23:07: INFO: Processing row 15000000 145s since last
08/12/2021 05:20:42: INFO: Finished Writing another 20000 row(s) to file in 0s for TAG_ATTRIBUTES
在 Visual Studio 诊断工具中监控时,应用程序中的进程 Memory 使用率始终低于 100MB。
这是.Net Framework 4. AnyCPU
我想知道什么可能导致逐渐放缓?
我研究了在 StringBuilder 中构建 output 文件的全部内容。 发生了同样的逐渐减速,但 memory 的使用量在 StringBuilder 填充时以 GB 为单位。
为了证明我在评论中所说的话。 Using
块关闭并处理对象。
Public Shared Function GetDataTable(ByVal strSQL As String, ByVal strConnection As String) As OleDb.OleDbDataTable
Dim dt As New DataTable
Using cnn As New OleDb.OleDbConnection(strConnection),
cmd As New OleDbCommand(strSQL, cnn)
cnn.Open()
Using reader = cmd.ExecuteReader
dt.Load(reader)
End Using
End Using
Return dt
End Function
GenerateTextSqlReportWithCurrent
中会改变的部分。
Dim ColumnNames As String() = From dc As DataColumn In dt.Columns
Select dc.ColumnName
Dim strNames = String.Join(", ", ColumnNames)
WriteToFile(filename, strNames, False)
Dim i As Integer
For Each row As DataRow In dt.Rows
For i = 0 To dt.Columns.Count - 1
Dim output As String
output = Replace(row(i).ToString, vbCr, String.Empty)
output = Replace(output, vbLf, String.Empty)
output = Replace(output, "|", String.Empty)
batch.Append(output)
If i < dt.Columns.Count - 1 Then
batch.Append("|")
End If
Next
Next
如下执行 Mary 的建议,要求将来自查询的所有数据加载到 memory(1500 万个 DataRow 对象)中。 创建了几百万个 DataRows 后,应用程序变慢了,1 小时后仅加载了 500 万行,其中 3 GB 的进程 Memory 正在使用中。 应用程序正在执行 dt.Load(reader) 行。 所以不幸的是,在处理如此大量的记录时,这并不实用。
我将 go 用一个简单的 SSIS package 来完成这项工作。 我不明白怎么做,但它能够在我的环境中在大约 10 分钟内将 1500 万行导出到文本文件。
Private Sub GenerateTextSqlReportWithCurrent(report As TblreportEntity, filename As String)
Const batchSize = 20000
Dim encryption As New ClassEncrypt
LogEvent($"INFO: Filling DataTable for report {report.ReportName}")
Dim dt as DataTable = GetDataTable(report.Sql,IMOracle2.GetConnectString(My.Settings.DB_Instance, encryption.Decrypt(My.Settings.DB_UserID), encryption.Decrypt(My.Settings.DB_PWD)))
LogEvent($"INFO: Finished Filling DataTable for report {report.ReportName}")
Dim columnNames = From dc As DataColumn In dt.Columns
Select dc.ColumnName
Dim strNames = String.Join("|", columnNames)
WriteToFile(filename, strNames, False)
Dim batch As New StringBuilder()
Dim rowCount As Integer
Dim i As Integer
For Each row As DataRow In dt.Rows
For i = 0 To dt.Columns.Count - 1
Dim output As String
output = Replace(row(i).ToString, vbCr, String.Empty)
output = Replace(output, vbLf, String.Empty)
output = Replace(output, "|", String.Empty)
batch.Append(output)
If i < dt.Columns.Count - 1 Then
batch.Append("|")
End If
Next i
batch.Append(vbCrLf)
rowCount += 1
Dim lastReport As DateTime = DateTime.Now()
If rowCount Mod batchSize = 0 Then
Dim now = Date.Now
Dim sinceLastSeconds = DateDiff(DateInterval.Second, lastReport, now)
lastReport = now
LogEvent($"INFO: Processing row {rowCount} {sinceLastSeconds}s since last")
Dim fileWriteStart = Date.Now
'LogEvent($"INFO: Starting Writing {rowCount} row(s) to file for {report.ReportName}. {sinceLastSeconds}s since last")
WriteToFile(filename, batch.ToString(), True)
Dim fileWriteSeconds = DateDiff(DateInterval.Second, fileWriteStart, Date.Now)
LogEvent($"INFO: Finished Writing another {batchSize} row(s) to file in {fileWriteSeconds}s for {report.ReportName}")
batch.Clear()
End If
Next row
WriteToFile(filename, batch.ToString(), True)
LogEvent($"INFO: Finished Writing last row(s) to {filename} for {report.ReportName}")
End Sub
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.