[英]Accumulative Net Out Flow as per the Transaction Datetime Sequence each Customer in SQL Server
I'm trying to get Net Out Flows for each customer as per sequence of transaction occurrence (DateTime ASC).我正在尝试根据交易发生的顺序(DateTime ASC)为每个客户获取净流出流量。 For this purpose I've created a loop query but unfortunately it is taking heavy amount of time to process.
为此,我创建了一个循环查询,但不幸的是它需要大量时间来处理。 20K records took 8 minutes while I've to run it on 5 million of records.
20K 条记录需要 8 分钟,而我必须在 500 万条记录上运行它。
Explanation: For example, Customer 923xxxxx307 has done 4 transaction, here is explanation that how it would work解释:例如,客户 923xxxxx307 完成了 4 笔交易,这里解释它是如何工作的
This bucket would be used in next OUT FLOW transaction or if next transaction is again IN FLOW than amount of new transaction should be added in existing 360 in bucket此存储桶将用于下一个 OUT FLOW 事务,或者如果下一个事务再次 IN FLOW,则应在存储桶中的现有 360 中添加新事务的数量
I've also added complete SQL script with small sample data set for your test run.我还为您的测试运行添加了带有小样本数据集的完整 SQL 脚本。
Please help me with some efficient solution for this.请为此提供一些有效的解决方案。 I'm attached Input & Output screenshot along with my code.
我附上了 Input & Output 屏幕截图以及我的代码。
-- Creating SAMPLE DATA Table ---
DROP TABLE IF EXISTS #TestData
CREATE TABLE #TestData
(
[FINANCIAL ID] BIGINT,
[DATE] DATETIME,
[TRXN DATETIME] DATETIME,
AMOUNT FLOAT,
[CUSTOMER NO] VARCHAR(20),
[PRODUCT NAME] VARCHAR(10)
)
GO
-- Inserting Sample Date in above table ---
INSERT INTO #TestData
VALUES (9442001596,'2020-11-01','2020-11-01 00:05:18',26,'923xxxxx307','OUT FLOW')
INSERT INTO #TestData
VALUES (9442094863,'2020-11-01','2020-11-01 00:15:01',60,'923xxxxx307','IN FLOW')
INSERT INTO #TestData
VALUES (9442106611,'2020-11-01','2020-11-01 00:16:26',62,'923xxxxx307','OUT FLOW')
INSERT INTO #TestData
VALUES (9442198611,'2020-11-01','2020-11-01 00:30:35',360,'923xxxxx307','IN FLOW')
INSERT INTO #TestData
VALUES (9442227548,'2020-11-01','2020-11-01 00:36:07',15000,'923xxxxx864','IN FLOW')
INSERT INTO #TestData
VALUES (9442264685,'2020-11-01','2020-11-01 00:44:03',1660,'923xxxxx864','IN FLOW')
INSERT INTO #TestData
VALUES (9442266137,'2020-11-01','2020-11-01 00:44:24',4540,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442358832,'2020-11-01','2020-11-01 01:08:06',200,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442434263,'2020-11-01','2020-11-01 01:34:05',190,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442517054,'2020-11-01','2020-11-01 02:14:48',5000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442525893,'2020-11-01','2020-11-01 02:20:18',5000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442533823,'2020-11-01','2020-11-01 02:25:14',10000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442541534,'2020-11-01','2020-11-01 02:30:25',10000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442545883,'2020-11-01','2020-11-01 02:33:04',25500,'923xxxxx562','OUT FLOW')
INSERT INTO #TestData
VALUES (9442552698,'2020-11-01','2020-11-01 02:37:08',5000,'923xxxxx562','OUT FLOW')
INSERT INTO #TestData
VALUES (9443455472,'2020-11-01','2020-11-01 07:45:10',180,'923xxxxx074','OUT FLOW')
INSERT INTO #TestData
VALUES (9443529884,'2020-11-01','2020-11-01 07:54:41',280,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443657359,'2020-11-01','2020-11-01 08:10:09',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443670261,'2020-11-01','2020-11-01 08:11:34',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443682756,'2020-11-01','2020-11-01 08:12:59',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443683147,'2020-11-01','2020-11-01 08:13:01',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443872236,'2020-11-01','2020-11-01 08:33:04',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443872617,'2020-11-01','2020-11-01 08:33:06',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443886681,'2020-11-01','2020-11-01 08:34:31',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9444185688,'2020-11-01','2020-11-01 09:02:11',300,'923xxxxx074','IN FLOW')
-- Ranked table with ROW NUMBERS ---
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID])
INTO #TestData_Ranked
FROM #TestData td
GO
CREATE UNIQUE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (ROWNUMBER ASC)
GO
-- Main Query to get OUTPUT DATA table ---
DECLARE @start BIGINT = (SELECT MIN(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DECLARE @end BIGINT = (SELECT MAX(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)
DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios
WHILE (@start <= @end)
BEGIN
DECLARE @cur_FinancialId BIGINT = (SELECT [FINANCIAL ID] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_TrxnDateTime DATETIME = (SELECT [TRXN DATETIME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Date DATE = (SELECT DATE FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Amount FLOAT = (SELECT AMOUNT FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Customer_No VARCHAR(20) = (SELECT [CUSTOMER NO] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_ProductName VARCHAR(10) = (SELECT [PRODUCT NAME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @IN_IfExists INT = (SELECT COUNT(*) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No)
DECLARE @IN_Amount FLOAT = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No), 0)
DECLARE @Remaining_Amount FLOAT = ISNULL(CASE WHEN (@cur_Amount - @IN_Amount)<0 THEN 0 ELSE (@cur_Amount - @IN_Amount) END, 0)
PRINT @start
PRINT @cur_Amount
PRINT @IN_Amount
IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists = 0)
BEGIN
INSERT INTO #ibft_in
VALUES (@cur_Customer_No, @cur_TrxnDateTime, @cur_Amount)
END
ELSE
BEGIN
IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists > 0)
BEGIN
UPDATE #ibft_in
SET
[TRXN DATETIME] = @cur_TrxnDateTime,
AMOUNT = @IN_Amount + @cur_Amount
WHERE
[CUSTOMER NO] = @cur_Customer_No
END
ELSE
BEGIN
IF (@cur_ProductName = 'OUT FLOW')
BEGIN
INSERT INTO #ibft_out
VALUES (@cur_FinancialId, @cur_Date, @cur_TrxnDateTime, @cur_Amount, @cur_Customer_No, @cur_ProductName, @Remaining_Amount)
UPDATE #ibft_in
SET
AMOUNT = CASE WHEN (@IN_Amount - @cur_Amount)<0 THEN 0 ELSE (@IN_Amount - @cur_Amount) END
WHERE
[CUSTOMER NO] = @cur_Customer_No
END
END
END
SET @start = @start + 1
END
--------------------------------------------
SAMPLE DATA样本数据
REQUIRED OUTPUT需要 OUTPUT
EDIT :编辑:
This is technically still a RBAR query, just rewritten.这在技术上仍然是一个 RBAR 查询,只是重写了。 I don't have enough sample data to know what the performance is like, so obviously please run this against your 20k data set to compare speed, but this uses CTE instead of a CURSOR.
我没有足够的样本数据来知道性能是什么样的,所以显然请对你的 20k 数据集运行它来比较速度,但这使用 CTE 而不是 CURSOR。 Note that this starts right at your
-- Ranked table with ROW NUMBERS ---
line:请注意,这从您的
-- Ranked table with ROW NUMBERS ---
行开始:
-- Ranked table with ROW NUMBERS ---
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *,
--ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID]),
TrxTypeSEQ = ROW_NUMBER() over (PARTITION BY [customer no] ORDER BY [trxn datetime])
INTO #TestData_Ranked
FROM #TestData td
GO
-- not unique but still an index
CREATE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (TrxTypeSEQ ASC)
GO
DROP TABLE IF EXISTS #ibft_out;
WiTH SortedTrx AS (
SELECT
[Financial ID],
[Date],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name],
TrxTypeSEQ,
BUCKET = case when [Product Name] = 'IN FLOW' then Amount ELSE 0 END,
NETOUT = case when [Product Name] = 'OUT FLOW' then Amount ELSE 0 END
FROM #TestData_Ranked
WHERE TrxTypeSEQ = 1
UNION ALL
SELECT
AllTrx.[Financial ID],
AllTrx.[Date],
AllTrx.[Trxn Datetime],
AllTrx.[Amount],
AllTrx.[Customer No],
AllTrx.[Product Name],
AllTrx.TrxTypeSEQ,
BUCKET = case when AllTrx.[Product Name] = 'IN FLOW' then SortedTrx.Bucket + AllTrx.Amount
ELSE
case when AllTrx.[Amount] > SortedTrx.Bucket then 0 else SortedTrx.Bucket - AllTrx.[Amount] end
END,
NETOUT = case when AllTrx.[Product Name] = 'IN FLOW' then 0
else
case when AllTrx.[Amount] < SortedTrx.Bucket then 0 else AllTrx.[Amount] - SortedTrx.Bucket end
END
FROM #TestData_Ranked AllTrx
INNER JOIN SortedTrx ON AllTrx.[customer no] = SortedTrx.[customer no] AND SortedTrx.TrxTypeSEQ + 1 = AllTrx.TrxTypeSEQ
)
SELECT
[Financial ID],
[Date],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name],
NETOUT
INTO #ibft_out
FROM SortedTrx
WHERE [Product Name] = 'OUT FLOW'
ORDER BY [Trxn Datetime]
SELECT * FROM #ibft_out
ORIGINAL :原件:
To rewrite your WHILE
loop into a CURSOR
will cut down on the number of times you're querying #TestData_Ranked
from 6 to 1;将您的
WHILE
循环重写为CURSOR
将减少您查询#TestData_Ranked
的次数从 6 到 1; instead of querying #ibft_in
3x every single time, you'll only query it once or twice, depending on the transaction type.而不是每次查询
#ibft_in
3x,您只需查询一次或两次,具体取决于事务类型。 I'm interested to know what kind of performance it gives you;我很想知道它给你什么样的表现; the sample data ran in 7s with Execution Plan Analysis vs over 40s using your code.
使用执行计划分析的示例数据在 7 秒内运行,而使用您的代码则在 40 秒内运行。
Replacing everything after your -- Main Query to get OUTPUT DATA table ---
line with:在您的
-- Main Query to get OUTPUT DATA table ---
符合:
-- Create our useful tables for IN & OUT calculations
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)
DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios
-- Declare these once, outside a loop, to cut down on wasted work
DECLARE @currRow INT = 0,
@currFinancialID BIGINT = 0,
@currDateTime DATETIME,
@currAmt FLOAT,
@currCustomer VARCHAR(20),
@currTrxType VARCHAR(10),
@fundsAvailable FLOAT;
-- We want a one-way cursor as fast as we can
DECLARE trx_cursor CURSOR LOCAL FAST_FORWARD FOR
SELECT
ROWNUMBER,
[Financial ID],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name]
FROM #TestData_Ranked
ORDER BY ROWNUMBER ASC
OPEN trx_cursor
FETCH NEXT FROM trx_cursor INTO @currRow,
@currFinancialID,
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType
WHILE @@FETCH_STATUS = 0
BEGIN
-- If this is an IN transaction, INSERT or UPDATE our bucket of rupees
IF @currTrxType = 'IN FLOW'
MERGE #ibft_in as Tgt
USING (select @currCustomer as cno, @currDateTime as dt, @currAmt as amt) as Src
ON tgt.[CUSTOMER NO] = cno
WHEN NOT MATCHED BY TARGET THEN INSERT
VALUES (cno, dt, amt)
WHEN MATCHED THEN UPDATE
SET Tgt.[TRXN DATETIME] = dt,
Tgt.AMOUNT = tgt.amount + amt;
-- OTHERWISE, calculate our NET OUT
ELSE
BEGIN
SELECT @fundsAvailable = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @currCustomer), 0)
INSERT INTO #ibft_out
VALUES (
@currFinancialID,
CAST(@currDateTime as DATE),
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType,
ISNULL(CASE WHEN (@currAmt - @fundsAvailable)<0 THEN 0 ELSE (@currAmt - @fundsAvailable) END, 0)
)
UPDATE #ibft_in
SET AMOUNT = CASE WHEN (@fundsAvailable - @currAmt)<0 THEN 0 ELSE (@fundsAvailable - @currAmt) END
WHERE [CUSTOMER NO] = @currCustomer
END
FETCH NEXT FROM trx_cursor INTO @currRow,
@currFinancialID,
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType
END
CLOSE trx_cursor
DEALLOCATE trx_cursor
select * from #ibft_in
select * from #ibft_out
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.