SQL 服务器中每个客户按照交易日期时间顺序的累计净流出流量

Question

I'm trying to get Net Out Flows for each customer as per sequence of transaction occurrence (DateTime ASC).我正在尝试根据交易发生的顺序（DateTime ASC）为每个客户获取净流出流量。 For this purpose I've created a loop query but unfortunately it is taking heavy amount of time to process.为此，我创建了一个循环查询，但不幸的是它需要大量时间来处理。 20K records took 8 minutes while I've to run it on 5 million of records. 20K 条记录需要 8 分钟，而我必须在 500 万条记录上运行它。

Explanation: For example, Customer 923xxxxx307 has done 4 transaction, here is explanation that how it would work解释：例如，客户 923xxxxx307 完成了 4 笔交易，这里解释它是如何工作的

In Sample Data table, first transaction is OUT FLOW where he used 26 rupees out of account which is why in Required Output table NET OUT FLOW in first row is 26在示例数据表中，第一笔交易是 OUT FLOW，他在账户中使用了 26 卢比，这就是为什么在 Required Output 表中第一行的 NET OUT FLOW 是 26
In Sample Data table, Second transaction is IN FLOW where he deposited 60 rupees in his account, these 60 rupees should be parked in his own separate bucket and this should not reflect in Required Out table在 Sample Data 表中，第二笔交易是 IN FLOW，他在他的账户中存入了 60 卢比，这 60 卢比应存放在他自己的单独存储桶中，这不应反映在 Required Out 表中
In Sample Data table, third transaction is OUT FLOW where he used 62 rupees out of his account script should minus this amount of 62 out of his bucket maintained in point 2 so in Required Output table NET OUT FLOW column should show 2 ie (62-60 = 2).在示例数据表中，第三笔交易是 OUT FLOW，其中他从他的帐户脚本中使用了 62 卢比，应减去他在第 2 点维护的存储桶中的62 卢比，因此在 Required Output 表中，NET OUT FLOW 列应显示 2 即（62- 60 = 2)。 Further, his bucket should become 0 because it was fully consumed in 3rd transaction此外，他的存储桶应该变为 0，因为它在第三次交易中被完全消耗
In Sample Data table, forth transaction is again IN FLOW where he deposited 360 rupees in his account, so his bucket should again show 360 rupees, no impact on Required Output Table.在 Sample Data 表中，第四笔交易再次 IN FLOW，他在他的账户中存入了 360 卢比，因此他的存储桶应该再次显示 360 卢比，对 Required Output 表没有影响。 and so on.等等。

This bucket would be used in next OUT FLOW transaction or if next transaction is again IN FLOW than amount of new transaction should be added in existing 360 in bucket此存储桶将用于下一个 OUT FLOW 事务，或者如果下一个事务再次 IN FLOW，则应在存储桶中的现有 360 中添加新事务的数量

I've also added complete SQL script with small sample data set for your test run.我还为您的测试运行添加了带有小样本数据集的完整 SQL 脚本。

Please help me with some efficient solution for this.请为此提供一些有效的解决方案。 I'm attached Input & Output screenshot along with my code.我附上了 Input & Output 屏幕截图以及我的代码。

-- Creating SAMPLE DATA Table ---
DROP TABLE IF EXISTS #TestData
CREATE TABLE #TestData
(
    [FINANCIAL ID]  BIGINT,
    [DATE]          DATETIME,
    [TRXN DATETIME] DATETIME,
    AMOUNT          FLOAT,
    [CUSTOMER NO]   VARCHAR(20),
    [PRODUCT NAME]  VARCHAR(10)
)
GO

-- Inserting Sample Date in above table ---
INSERT INTO #TestData
VALUES (9442001596,'2020-11-01','2020-11-01 00:05:18',26,'923xxxxx307','OUT FLOW')

INSERT INTO #TestData
VALUES (9442094863,'2020-11-01','2020-11-01 00:15:01',60,'923xxxxx307','IN FLOW')

INSERT INTO #TestData
VALUES (9442106611,'2020-11-01','2020-11-01 00:16:26',62,'923xxxxx307','OUT FLOW')

INSERT INTO #TestData
VALUES (9442198611,'2020-11-01','2020-11-01 00:30:35',360,'923xxxxx307','IN FLOW')

INSERT INTO #TestData
VALUES (9442227548,'2020-11-01','2020-11-01 00:36:07',15000,'923xxxxx864','IN FLOW')

INSERT INTO #TestData
VALUES (9442264685,'2020-11-01','2020-11-01 00:44:03',1660,'923xxxxx864','IN FLOW')

INSERT INTO #TestData
VALUES (9442266137,'2020-11-01','2020-11-01 00:44:24',4540,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442358832,'2020-11-01','2020-11-01 01:08:06',200,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442434263,'2020-11-01','2020-11-01 01:34:05',190,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442517054,'2020-11-01','2020-11-01 02:14:48',5000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442525893,'2020-11-01','2020-11-01 02:20:18',5000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442533823,'2020-11-01','2020-11-01 02:25:14',10000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442541534,'2020-11-01','2020-11-01 02:30:25',10000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442545883,'2020-11-01','2020-11-01 02:33:04',25500,'923xxxxx562','OUT FLOW')

INSERT INTO #TestData
VALUES (9442552698,'2020-11-01','2020-11-01 02:37:08',5000,'923xxxxx562','OUT FLOW')

INSERT INTO #TestData
VALUES (9443455472,'2020-11-01','2020-11-01 07:45:10',180,'923xxxxx074','OUT FLOW')

INSERT INTO #TestData
VALUES (9443529884,'2020-11-01','2020-11-01 07:54:41',280,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443657359,'2020-11-01','2020-11-01 08:10:09',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443670261,'2020-11-01','2020-11-01 08:11:34',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443682756,'2020-11-01','2020-11-01 08:12:59',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443683147,'2020-11-01','2020-11-01 08:13:01',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443872236,'2020-11-01','2020-11-01 08:33:04',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443872617,'2020-11-01','2020-11-01 08:33:06',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443886681,'2020-11-01','2020-11-01 08:34:31',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9444185688,'2020-11-01','2020-11-01 09:02:11',300,'923xxxxx074','IN FLOW')

-- Ranked table with ROW NUMBERS --- 
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID])
INTO #TestData_Ranked
FROM #TestData td
GO

CREATE UNIQUE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (ROWNUMBER ASC)
GO


-- Main Query to get OUTPUT DATA table ---
DECLARE @start BIGINT   = (SELECT MIN(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DECLARE @end BIGINT     = (SELECT MAX(iios.ROWNUMBER) FROM #TestData_Ranked iios)

DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)

DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios


WHILE (@start <= @end)
BEGIN

    DECLARE @cur_FinancialId BIGINT = (SELECT [FINANCIAL ID] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_TrxnDateTime DATETIME = (SELECT [TRXN DATETIME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Date DATE = (SELECT DATE FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Amount FLOAT = (SELECT AMOUNT FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Customer_No VARCHAR(20) = (SELECT [CUSTOMER NO] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_ProductName VARCHAR(10) = (SELECT [PRODUCT NAME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)

    DECLARE @IN_IfExists INT    = (SELECT COUNT(*) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No)
    DECLARE @IN_Amount FLOAT    = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No), 0)

    DECLARE @Remaining_Amount FLOAT = ISNULL(CASE WHEN (@cur_Amount - @IN_Amount)<0 THEN 0 ELSE (@cur_Amount - @IN_Amount) END, 0)

    PRINT @start
    PRINT @cur_Amount
    PRINT @IN_Amount

    IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists = 0)
    BEGIN
        INSERT INTO #ibft_in
        VALUES (@cur_Customer_No, @cur_TrxnDateTime, @cur_Amount)
    END
    ELSE
    BEGIN
        IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists > 0)
        BEGIN
            UPDATE #ibft_in
            SET 
                [TRXN DATETIME] = @cur_TrxnDateTime,
                AMOUNT          = @IN_Amount + @cur_Amount
            WHERE 
                [CUSTOMER NO]           = @cur_Customer_No
        END
        ELSE
        BEGIN
            IF (@cur_ProductName = 'OUT FLOW')
            BEGIN
                INSERT INTO #ibft_out
                VALUES (@cur_FinancialId, @cur_Date, @cur_TrxnDateTime, @cur_Amount, @cur_Customer_No, @cur_ProductName, @Remaining_Amount)

                UPDATE #ibft_in
                SET 
                    AMOUNT = CASE WHEN (@IN_Amount - @cur_Amount)<0 THEN 0 ELSE (@IN_Amount - @cur_Amount) END
                WHERE
                    [CUSTOMER NO] = @cur_Customer_No
            END
        END
    END

SET @start = @start + 1
END
--------------------------------------------

SAMPLE DATA样本数据

REQUIRED OUTPUT需要 OUTPUT

Answer 1

EDIT :编辑：

This is technically still a RBAR query, just rewritten.这在技术上仍然是一个 RBAR 查询，只是重写了。 I don't have enough sample data to know what the performance is like, so obviously please run this against your 20k data set to compare speed, but this uses CTE instead of a CURSOR.我没有足够的样本数据来知道性能是什么样的，所以显然请对你的 20k 数据集运行它来比较速度，但这使用 CTE 而不是 CURSOR。 Note that this starts right at your -- Ranked table with ROW NUMBERS --- line:请注意，这从您的-- Ranked table with ROW NUMBERS ---行开始：

-- Ranked table with ROW NUMBERS --- 
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, 
    --ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID]), 
    TrxTypeSEQ = ROW_NUMBER() over (PARTITION BY [customer no] ORDER BY [trxn datetime])
INTO #TestData_Ranked
FROM #TestData td
GO

-- not unique but still an index
CREATE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (TrxTypeSEQ ASC)
GO


DROP TABLE IF EXISTS #ibft_out;

WiTH SortedTrx AS (
  SELECT 
    [Financial ID],
    [Date],
    [Trxn Datetime],
    [Amount],
    [Customer No],
    [Product Name],
    TrxTypeSEQ,
    BUCKET = case when [Product Name] = 'IN FLOW' then Amount ELSE 0 END,
    NETOUT = case when [Product Name] = 'OUT FLOW' then Amount ELSE 0 END
  FROM #TestData_Ranked  
  WHERE TrxTypeSEQ = 1

  UNION ALL 

  SELECT 
    AllTrx.[Financial ID],
    AllTrx.[Date],
    AllTrx.[Trxn Datetime],
    AllTrx.[Amount],
    AllTrx.[Customer No],
    AllTrx.[Product Name],
    AllTrx.TrxTypeSEQ,
    BUCKET = case when AllTrx.[Product Name] = 'IN FLOW' then SortedTrx.Bucket + AllTrx.Amount 
                    ELSE 
                        case when AllTrx.[Amount] > SortedTrx.Bucket then 0 else SortedTrx.Bucket - AllTrx.[Amount] end
                    END,
    NETOUT = case when AllTrx.[Product Name] = 'IN FLOW' then 0 
                    else 
                        case when AllTrx.[Amount] < SortedTrx.Bucket then 0 else AllTrx.[Amount] - SortedTrx.Bucket end
                    END
  FROM #TestData_Ranked  AllTrx 
  INNER JOIN SortedTrx ON AllTrx.[customer no] = SortedTrx.[customer no] AND SortedTrx.TrxTypeSEQ + 1 = AllTrx.TrxTypeSEQ
)
SELECT 
    [Financial ID],
    [Date],
    [Trxn Datetime],
    [Amount],
    [Customer No],
    [Product Name],
    NETOUT
INTO #ibft_out
FROM SortedTrx 
WHERE [Product Name] = 'OUT FLOW'
ORDER BY [Trxn Datetime]

SELECT * FROM #ibft_out

ORIGINAL :原件：

To rewrite your WHILE loop into a CURSOR will cut down on the number of times you're querying #TestData_Ranked from 6 to 1;将您的WHILE循环重写为CURSOR将减少您查询#TestData_Ranked的次数从 6 到 1； instead of querying #ibft_in 3x every single time, you'll only query it once or twice, depending on the transaction type.而不是每次查询#ibft_in 3x，您只需查询一次或两次，具体取决于事务类型。 I'm interested to know what kind of performance it gives you;我很想知道它给你什么样的表现； the sample data ran in 7s with Execution Plan Analysis vs over 40s using your code.使用执行计划分析的示例数据在 7 秒内运行，而使用您的代码则在 40 秒内运行。

Replacing everything after your -- Main Query to get OUTPUT DATA table --- line with:在您的-- Main Query to get OUTPUT DATA table ---符合：

-- Create our useful tables for IN & OUT calculations 
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)

DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios

-- Declare these once, outside a loop, to cut down on wasted work
DECLARE @currRow INT = 0,
        @currFinancialID BIGINT = 0,
        @currDateTime DATETIME,
        @currAmt FLOAT,
        @currCustomer VARCHAR(20),
        @currTrxType VARCHAR(10),
        @fundsAvailable FLOAT;
        
-- We want a one-way cursor as fast as we can
DECLARE trx_cursor CURSOR LOCAL FAST_FORWARD FOR
   SELECT 
      ROWNUMBER,
      [Financial ID],
      [Trxn Datetime],
      [Amount],
      [Customer No],
      [Product Name]
    FROM #TestData_Ranked
    ORDER BY ROWNUMBER ASC

OPEN trx_cursor
FETCH NEXT FROM trx_cursor INTO @currRow, 
                                @currFinancialID,
                                @currDateTime,
                                @currAmt,
                                @currCustomer,
                                @currTrxType
WHILE @@FETCH_STATUS = 0
BEGIN
    -- If this is an IN transaction, INSERT or UPDATE our bucket of rupees
    IF @currTrxType = 'IN FLOW' 
        MERGE #ibft_in as Tgt
        USING (select @currCustomer as cno, @currDateTime as dt, @currAmt as amt) as Src
        ON tgt.[CUSTOMER NO] = cno
        WHEN NOT MATCHED BY TARGET THEN INSERT 
            VALUES (cno, dt, amt) 
        WHEN MATCHED THEN UPDATE
            SET Tgt.[TRXN DATETIME] = dt,
                Tgt.AMOUNT = tgt.amount + amt;

    -- OTHERWISE, calculate our NET OUT
    ELSE 
    BEGIN
        SELECT @fundsAvailable = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @currCustomer), 0)

        INSERT INTO #ibft_out 
        VALUES ( 
            @currFinancialID, 
            CAST(@currDateTime as DATE), 
            @currDateTime, 
            @currAmt, 
            @currCustomer, 
            @currTrxType, 
            ISNULL(CASE WHEN (@currAmt - @fundsAvailable)<0 THEN 0 ELSE (@currAmt - @fundsAvailable) END, 0)
        )

        UPDATE #ibft_in
        SET AMOUNT = CASE WHEN (@fundsAvailable - @currAmt)<0 THEN 0 ELSE (@fundsAvailable - @currAmt) END
        WHERE [CUSTOMER NO] = @currCustomer
    END
        

FETCH NEXT FROM trx_cursor INTO @currRow, 
                                @currFinancialID,
                                @currDateTime,
                                @currAmt,
                                @currCustomer,
                                @currTrxType
END
CLOSE trx_cursor
DEALLOCATE trx_cursor

select * from #ibft_in
select * from #ibft_out

SQL 服务器中每个客户按照交易日期时间顺序的累计净流出流量

问题描述

1 个解决方案

解决方案1
1 已采纳 2020-12-01 01:31:10

SQL 服务器中每个客户按照交易日期时间顺序的累计净流出流量

问题描述

1 个解决方案

解决方案1 1 已采纳 2020-12-01 01:31:10

解决方案1
1 已采纳 2020-12-01 01:31:10