[英]Accumulative Net Out Flow as per the Transaction Datetime Sequence each Customer in SQL Server
我正在尝试根据交易发生的顺序(DateTime ASC)为每个客户获取净流出流量。 为此,我创建了一个循环查询,但不幸的是它需要大量时间来处理。 20K 条记录需要 8 分钟,而我必须在 500 万条记录上运行它。
解释:例如,客户 923xxxxx307 完成了 4 笔交易,这里解释它是如何工作的
此存储桶将用于下一个 OUT FLOW 事务,或者如果下一个事务再次 IN FLOW,则应在存储桶中的现有 360 中添加新事务的数量
我还为您的测试运行添加了带有小样本数据集的完整 SQL 脚本。
请为此提供一些有效的解决方案。 我附上了 Input & Output 屏幕截图以及我的代码。
-- Creating SAMPLE DATA Table ---
DROP TABLE IF EXISTS #TestData
CREATE TABLE #TestData
(
[FINANCIAL ID] BIGINT,
[DATE] DATETIME,
[TRXN DATETIME] DATETIME,
AMOUNT FLOAT,
[CUSTOMER NO] VARCHAR(20),
[PRODUCT NAME] VARCHAR(10)
)
GO
-- Inserting Sample Date in above table ---
INSERT INTO #TestData
VALUES (9442001596,'2020-11-01','2020-11-01 00:05:18',26,'923xxxxx307','OUT FLOW')
INSERT INTO #TestData
VALUES (9442094863,'2020-11-01','2020-11-01 00:15:01',60,'923xxxxx307','IN FLOW')
INSERT INTO #TestData
VALUES (9442106611,'2020-11-01','2020-11-01 00:16:26',62,'923xxxxx307','OUT FLOW')
INSERT INTO #TestData
VALUES (9442198611,'2020-11-01','2020-11-01 00:30:35',360,'923xxxxx307','IN FLOW')
INSERT INTO #TestData
VALUES (9442227548,'2020-11-01','2020-11-01 00:36:07',15000,'923xxxxx864','IN FLOW')
INSERT INTO #TestData
VALUES (9442264685,'2020-11-01','2020-11-01 00:44:03',1660,'923xxxxx864','IN FLOW')
INSERT INTO #TestData
VALUES (9442266137,'2020-11-01','2020-11-01 00:44:24',4540,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442358832,'2020-11-01','2020-11-01 01:08:06',200,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442434263,'2020-11-01','2020-11-01 01:34:05',190,'923xxxxx864','OUT FLOW')
INSERT INTO #TestData
VALUES (9442517054,'2020-11-01','2020-11-01 02:14:48',5000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442525893,'2020-11-01','2020-11-01 02:20:18',5000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442533823,'2020-11-01','2020-11-01 02:25:14',10000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442541534,'2020-11-01','2020-11-01 02:30:25',10000,'923xxxxx562','IN FLOW')
INSERT INTO #TestData
VALUES (9442545883,'2020-11-01','2020-11-01 02:33:04',25500,'923xxxxx562','OUT FLOW')
INSERT INTO #TestData
VALUES (9442552698,'2020-11-01','2020-11-01 02:37:08',5000,'923xxxxx562','OUT FLOW')
INSERT INTO #TestData
VALUES (9443455472,'2020-11-01','2020-11-01 07:45:10',180,'923xxxxx074','OUT FLOW')
INSERT INTO #TestData
VALUES (9443529884,'2020-11-01','2020-11-01 07:54:41',280,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443657359,'2020-11-01','2020-11-01 08:10:09',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443670261,'2020-11-01','2020-11-01 08:11:34',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443682756,'2020-11-01','2020-11-01 08:12:59',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443683147,'2020-11-01','2020-11-01 08:13:01',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443872236,'2020-11-01','2020-11-01 08:33:04',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443872617,'2020-11-01','2020-11-01 08:33:06',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9443886681,'2020-11-01','2020-11-01 08:34:31',100,'923xxxxx074','IN FLOW')
INSERT INTO #TestData
VALUES (9444185688,'2020-11-01','2020-11-01 09:02:11',300,'923xxxxx074','IN FLOW')
-- Ranked table with ROW NUMBERS ---
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID])
INTO #TestData_Ranked
FROM #TestData td
GO
CREATE UNIQUE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (ROWNUMBER ASC)
GO
-- Main Query to get OUTPUT DATA table ---
DECLARE @start BIGINT = (SELECT MIN(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DECLARE @end BIGINT = (SELECT MAX(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)
DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios
WHILE (@start <= @end)
BEGIN
DECLARE @cur_FinancialId BIGINT = (SELECT [FINANCIAL ID] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_TrxnDateTime DATETIME = (SELECT [TRXN DATETIME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Date DATE = (SELECT DATE FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Amount FLOAT = (SELECT AMOUNT FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_Customer_No VARCHAR(20) = (SELECT [CUSTOMER NO] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @cur_ProductName VARCHAR(10) = (SELECT [PRODUCT NAME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
DECLARE @IN_IfExists INT = (SELECT COUNT(*) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No)
DECLARE @IN_Amount FLOAT = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No), 0)
DECLARE @Remaining_Amount FLOAT = ISNULL(CASE WHEN (@cur_Amount - @IN_Amount)<0 THEN 0 ELSE (@cur_Amount - @IN_Amount) END, 0)
PRINT @start
PRINT @cur_Amount
PRINT @IN_Amount
IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists = 0)
BEGIN
INSERT INTO #ibft_in
VALUES (@cur_Customer_No, @cur_TrxnDateTime, @cur_Amount)
END
ELSE
BEGIN
IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists > 0)
BEGIN
UPDATE #ibft_in
SET
[TRXN DATETIME] = @cur_TrxnDateTime,
AMOUNT = @IN_Amount + @cur_Amount
WHERE
[CUSTOMER NO] = @cur_Customer_No
END
ELSE
BEGIN
IF (@cur_ProductName = 'OUT FLOW')
BEGIN
INSERT INTO #ibft_out
VALUES (@cur_FinancialId, @cur_Date, @cur_TrxnDateTime, @cur_Amount, @cur_Customer_No, @cur_ProductName, @Remaining_Amount)
UPDATE #ibft_in
SET
AMOUNT = CASE WHEN (@IN_Amount - @cur_Amount)<0 THEN 0 ELSE (@IN_Amount - @cur_Amount) END
WHERE
[CUSTOMER NO] = @cur_Customer_No
END
END
END
SET @start = @start + 1
END
--------------------------------------------
样本数据
需要 OUTPUT
编辑:
这在技术上仍然是一个 RBAR 查询,只是重写了。 我没有足够的样本数据来知道性能是什么样的,所以显然请对你的 20k 数据集运行它来比较速度,但这使用 CTE 而不是 CURSOR。 请注意,这从您的-- Ranked table with ROW NUMBERS ---
行开始:
-- Ranked table with ROW NUMBERS ---
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *,
--ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID]),
TrxTypeSEQ = ROW_NUMBER() over (PARTITION BY [customer no] ORDER BY [trxn datetime])
INTO #TestData_Ranked
FROM #TestData td
GO
-- not unique but still an index
CREATE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (TrxTypeSEQ ASC)
GO
DROP TABLE IF EXISTS #ibft_out;
WiTH SortedTrx AS (
SELECT
[Financial ID],
[Date],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name],
TrxTypeSEQ,
BUCKET = case when [Product Name] = 'IN FLOW' then Amount ELSE 0 END,
NETOUT = case when [Product Name] = 'OUT FLOW' then Amount ELSE 0 END
FROM #TestData_Ranked
WHERE TrxTypeSEQ = 1
UNION ALL
SELECT
AllTrx.[Financial ID],
AllTrx.[Date],
AllTrx.[Trxn Datetime],
AllTrx.[Amount],
AllTrx.[Customer No],
AllTrx.[Product Name],
AllTrx.TrxTypeSEQ,
BUCKET = case when AllTrx.[Product Name] = 'IN FLOW' then SortedTrx.Bucket + AllTrx.Amount
ELSE
case when AllTrx.[Amount] > SortedTrx.Bucket then 0 else SortedTrx.Bucket - AllTrx.[Amount] end
END,
NETOUT = case when AllTrx.[Product Name] = 'IN FLOW' then 0
else
case when AllTrx.[Amount] < SortedTrx.Bucket then 0 else AllTrx.[Amount] - SortedTrx.Bucket end
END
FROM #TestData_Ranked AllTrx
INNER JOIN SortedTrx ON AllTrx.[customer no] = SortedTrx.[customer no] AND SortedTrx.TrxTypeSEQ + 1 = AllTrx.TrxTypeSEQ
)
SELECT
[Financial ID],
[Date],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name],
NETOUT
INTO #ibft_out
FROM SortedTrx
WHERE [Product Name] = 'OUT FLOW'
ORDER BY [Trxn Datetime]
SELECT * FROM #ibft_out
原件:
将您的WHILE
循环重写为CURSOR
将减少您查询#TestData_Ranked
的次数从 6 到 1; 而不是每次查询#ibft_in
3x,您只需查询一次或两次,具体取决于事务类型。 我很想知道它给你什么样的表现; 使用执行计划分析的示例数据在 7 秒内运行,而使用您的代码则在 40 秒内运行。
在您的-- Main Query to get OUTPUT DATA table ---
符合:
-- Create our useful tables for IN & OUT calculations
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)
DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios
-- Declare these once, outside a loop, to cut down on wasted work
DECLARE @currRow INT = 0,
@currFinancialID BIGINT = 0,
@currDateTime DATETIME,
@currAmt FLOAT,
@currCustomer VARCHAR(20),
@currTrxType VARCHAR(10),
@fundsAvailable FLOAT;
-- We want a one-way cursor as fast as we can
DECLARE trx_cursor CURSOR LOCAL FAST_FORWARD FOR
SELECT
ROWNUMBER,
[Financial ID],
[Trxn Datetime],
[Amount],
[Customer No],
[Product Name]
FROM #TestData_Ranked
ORDER BY ROWNUMBER ASC
OPEN trx_cursor
FETCH NEXT FROM trx_cursor INTO @currRow,
@currFinancialID,
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType
WHILE @@FETCH_STATUS = 0
BEGIN
-- If this is an IN transaction, INSERT or UPDATE our bucket of rupees
IF @currTrxType = 'IN FLOW'
MERGE #ibft_in as Tgt
USING (select @currCustomer as cno, @currDateTime as dt, @currAmt as amt) as Src
ON tgt.[CUSTOMER NO] = cno
WHEN NOT MATCHED BY TARGET THEN INSERT
VALUES (cno, dt, amt)
WHEN MATCHED THEN UPDATE
SET Tgt.[TRXN DATETIME] = dt,
Tgt.AMOUNT = tgt.amount + amt;
-- OTHERWISE, calculate our NET OUT
ELSE
BEGIN
SELECT @fundsAvailable = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @currCustomer), 0)
INSERT INTO #ibft_out
VALUES (
@currFinancialID,
CAST(@currDateTime as DATE),
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType,
ISNULL(CASE WHEN (@currAmt - @fundsAvailable)<0 THEN 0 ELSE (@currAmt - @fundsAvailable) END, 0)
)
UPDATE #ibft_in
SET AMOUNT = CASE WHEN (@fundsAvailable - @currAmt)<0 THEN 0 ELSE (@fundsAvailable - @currAmt) END
WHERE [CUSTOMER NO] = @currCustomer
END
FETCH NEXT FROM trx_cursor INTO @currRow,
@currFinancialID,
@currDateTime,
@currAmt,
@currCustomer,
@currTrxType
END
CLOSE trx_cursor
DEALLOCATE trx_cursor
select * from #ibft_in
select * from #ibft_out
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.