简体   繁体   中英

Accumulative Net Out Flow as per the Transaction Datetime Sequence each Customer in SQL Server

I'm trying to get Net Out Flows for each customer as per sequence of transaction occurrence (DateTime ASC). For this purpose I've created a loop query but unfortunately it is taking heavy amount of time to process. 20K records took 8 minutes while I've to run it on 5 million of records.

Explanation: For example, Customer 923xxxxx307 has done 4 transaction, here is explanation that how it would work

  1. In Sample Data table, first transaction is OUT FLOW where he used 26 rupees out of account which is why in Required Output table NET OUT FLOW in first row is 26
  2. In Sample Data table, Second transaction is IN FLOW where he deposited 60 rupees in his account, these 60 rupees should be parked in his own separate bucket and this should not reflect in Required Out table
  3. In Sample Data table, third transaction is OUT FLOW where he used 62 rupees out of his account script should minus this amount of 62 out of his bucket maintained in point 2 so in Required Output table NET OUT FLOW column should show 2 ie (62-60 = 2). Further, his bucket should become 0 because it was fully consumed in 3rd transaction
  4. In Sample Data table, forth transaction is again IN FLOW where he deposited 360 rupees in his account, so his bucket should again show 360 rupees, no impact on Required Output Table. and so on.

This bucket would be used in next OUT FLOW transaction or if next transaction is again IN FLOW than amount of new transaction should be added in existing 360 in bucket

I've also added complete SQL script with small sample data set for your test run.

Please help me with some efficient solution for this. I'm attached Input & Output screenshot along with my code.

-- Creating SAMPLE DATA Table ---
DROP TABLE IF EXISTS #TestData
CREATE TABLE #TestData
(
    [FINANCIAL ID]  BIGINT,
    [DATE]          DATETIME,
    [TRXN DATETIME] DATETIME,
    AMOUNT          FLOAT,
    [CUSTOMER NO]   VARCHAR(20),
    [PRODUCT NAME]  VARCHAR(10)
)
GO

-- Inserting Sample Date in above table ---
INSERT INTO #TestData
VALUES (9442001596,'2020-11-01','2020-11-01 00:05:18',26,'923xxxxx307','OUT FLOW')

INSERT INTO #TestData
VALUES (9442094863,'2020-11-01','2020-11-01 00:15:01',60,'923xxxxx307','IN FLOW')

INSERT INTO #TestData
VALUES (9442106611,'2020-11-01','2020-11-01 00:16:26',62,'923xxxxx307','OUT FLOW')

INSERT INTO #TestData
VALUES (9442198611,'2020-11-01','2020-11-01 00:30:35',360,'923xxxxx307','IN FLOW')

INSERT INTO #TestData
VALUES (9442227548,'2020-11-01','2020-11-01 00:36:07',15000,'923xxxxx864','IN FLOW')

INSERT INTO #TestData
VALUES (9442264685,'2020-11-01','2020-11-01 00:44:03',1660,'923xxxxx864','IN FLOW')

INSERT INTO #TestData
VALUES (9442266137,'2020-11-01','2020-11-01 00:44:24',4540,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442358832,'2020-11-01','2020-11-01 01:08:06',200,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442434263,'2020-11-01','2020-11-01 01:34:05',190,'923xxxxx864','OUT FLOW')

INSERT INTO #TestData
VALUES (9442517054,'2020-11-01','2020-11-01 02:14:48',5000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442525893,'2020-11-01','2020-11-01 02:20:18',5000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442533823,'2020-11-01','2020-11-01 02:25:14',10000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442541534,'2020-11-01','2020-11-01 02:30:25',10000,'923xxxxx562','IN FLOW')

INSERT INTO #TestData
VALUES (9442545883,'2020-11-01','2020-11-01 02:33:04',25500,'923xxxxx562','OUT FLOW')

INSERT INTO #TestData
VALUES (9442552698,'2020-11-01','2020-11-01 02:37:08',5000,'923xxxxx562','OUT FLOW')

INSERT INTO #TestData
VALUES (9443455472,'2020-11-01','2020-11-01 07:45:10',180,'923xxxxx074','OUT FLOW')

INSERT INTO #TestData
VALUES (9443529884,'2020-11-01','2020-11-01 07:54:41',280,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443657359,'2020-11-01','2020-11-01 08:10:09',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443670261,'2020-11-01','2020-11-01 08:11:34',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443682756,'2020-11-01','2020-11-01 08:12:59',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443683147,'2020-11-01','2020-11-01 08:13:01',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443872236,'2020-11-01','2020-11-01 08:33:04',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443872617,'2020-11-01','2020-11-01 08:33:06',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9443886681,'2020-11-01','2020-11-01 08:34:31',100,'923xxxxx074','IN FLOW')

INSERT INTO #TestData
VALUES (9444185688,'2020-11-01','2020-11-01 09:02:11',300,'923xxxxx074','IN FLOW')

-- Ranked table with ROW NUMBERS --- 
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID])
INTO #TestData_Ranked
FROM #TestData td
GO

CREATE UNIQUE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (ROWNUMBER ASC)
GO


-- Main Query to get OUTPUT DATA table ---
DECLARE @start BIGINT   = (SELECT MIN(iios.ROWNUMBER) FROM #TestData_Ranked iios)
DECLARE @end BIGINT     = (SELECT MAX(iios.ROWNUMBER) FROM #TestData_Ranked iios)

DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)

DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios


WHILE (@start <= @end)
BEGIN

    DECLARE @cur_FinancialId BIGINT = (SELECT [FINANCIAL ID] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_TrxnDateTime DATETIME = (SELECT [TRXN DATETIME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Date DATE = (SELECT DATE FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Amount FLOAT = (SELECT AMOUNT FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_Customer_No VARCHAR(20) = (SELECT [CUSTOMER NO] FROM #TestData_Ranked WHERE ROWNUMBER = @start)
    DECLARE @cur_ProductName VARCHAR(10) = (SELECT [PRODUCT NAME] FROM #TestData_Ranked WHERE ROWNUMBER = @start)

    DECLARE @IN_IfExists INT    = (SELECT COUNT(*) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No)
    DECLARE @IN_Amount FLOAT    = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @cur_Customer_No), 0)

    DECLARE @Remaining_Amount FLOAT = ISNULL(CASE WHEN (@cur_Amount - @IN_Amount)<0 THEN 0 ELSE (@cur_Amount - @IN_Amount) END, 0)

    PRINT @start
    PRINT @cur_Amount
    PRINT @IN_Amount

    IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists = 0)
    BEGIN
        INSERT INTO #ibft_in
        VALUES (@cur_Customer_No, @cur_TrxnDateTime, @cur_Amount)
    END
    ELSE
    BEGIN
        IF (@cur_ProductName = 'IN FLOW' AND @IN_IfExists > 0)
        BEGIN
            UPDATE #ibft_in
            SET 
                [TRXN DATETIME] = @cur_TrxnDateTime,
                AMOUNT          = @IN_Amount + @cur_Amount
            WHERE 
                [CUSTOMER NO]           = @cur_Customer_No
        END
        ELSE
        BEGIN
            IF (@cur_ProductName = 'OUT FLOW')
            BEGIN
                INSERT INTO #ibft_out
                VALUES (@cur_FinancialId, @cur_Date, @cur_TrxnDateTime, @cur_Amount, @cur_Customer_No, @cur_ProductName, @Remaining_Amount)

                UPDATE #ibft_in
                SET 
                    AMOUNT = CASE WHEN (@IN_Amount - @cur_Amount)<0 THEN 0 ELSE (@IN_Amount - @cur_Amount) END
                WHERE
                    [CUSTOMER NO] = @cur_Customer_No
            END
        END
    END

SET @start = @start + 1
END
--------------------------------------------

SAMPLE DATA

在此处输入图像描述

REQUIRED OUTPUT

在此处输入图像描述

EDIT :

This is technically still a RBAR query, just rewritten. I don't have enough sample data to know what the performance is like, so obviously please run this against your 20k data set to compare speed, but this uses CTE instead of a CURSOR. Note that this starts right at your -- Ranked table with ROW NUMBERS --- line:

-- Ranked table with ROW NUMBERS --- 
DROP TABLE IF EXISTS #TestData_Ranked
SELECT *, 
    --ROWNUMBER = ROW_NUMBER() OVER (ORDER BY td.[FINANCIAL ID]), 
    TrxTypeSEQ = ROW_NUMBER() over (PARTITION BY [customer no] ORDER BY [trxn datetime])
INTO #TestData_Ranked
FROM #TestData td
GO

-- not unique but still an index
CREATE CLUSTERED INDEX [CI_ROWNUMBER] ON #TestData_Ranked (TrxTypeSEQ ASC)
GO


DROP TABLE IF EXISTS #ibft_out;

WiTH SortedTrx AS (
  SELECT 
    [Financial ID],
    [Date],
    [Trxn Datetime],
    [Amount],
    [Customer No],
    [Product Name],
    TrxTypeSEQ,
    BUCKET = case when [Product Name] = 'IN FLOW' then Amount ELSE 0 END,
    NETOUT = case when [Product Name] = 'OUT FLOW' then Amount ELSE 0 END
  FROM #TestData_Ranked  
  WHERE TrxTypeSEQ = 1

  UNION ALL 

  SELECT 
    AllTrx.[Financial ID],
    AllTrx.[Date],
    AllTrx.[Trxn Datetime],
    AllTrx.[Amount],
    AllTrx.[Customer No],
    AllTrx.[Product Name],
    AllTrx.TrxTypeSEQ,
    BUCKET = case when AllTrx.[Product Name] = 'IN FLOW' then SortedTrx.Bucket + AllTrx.Amount 
                    ELSE 
                        case when AllTrx.[Amount] > SortedTrx.Bucket then 0 else SortedTrx.Bucket - AllTrx.[Amount] end
                    END,
    NETOUT = case when AllTrx.[Product Name] = 'IN FLOW' then 0 
                    else 
                        case when AllTrx.[Amount] < SortedTrx.Bucket then 0 else AllTrx.[Amount] - SortedTrx.Bucket end
                    END
  FROM #TestData_Ranked  AllTrx 
  INNER JOIN SortedTrx ON AllTrx.[customer no] = SortedTrx.[customer no] AND SortedTrx.TrxTypeSEQ + 1 = AllTrx.TrxTypeSEQ
)
SELECT 
    [Financial ID],
    [Date],
    [Trxn Datetime],
    [Amount],
    [Customer No],
    [Product Name],
    NETOUT
INTO #ibft_out
FROM SortedTrx 
WHERE [Product Name] = 'OUT FLOW'
ORDER BY [Trxn Datetime]

SELECT * FROM #ibft_out

ORIGINAL :

To rewrite your WHILE loop into a CURSOR will cut down on the number of times you're querying #TestData_Ranked from 6 to 1; instead of querying #ibft_in 3x every single time, you'll only query it once or twice, depending on the transaction type. I'm interested to know what kind of performance it gives you; the sample data ran in 7s with Execution Plan Analysis vs over 40s using your code.

Replacing everything after your -- Main Query to get OUTPUT DATA table --- line with:

-- Create our useful tables for IN & OUT calculations 
DROP TABLE IF EXISTS #ibft_in
CREATE TABLE #ibft_in ([CUSTOMER NO] VARCHAR(20), [TRXN DATETIME] DATETIME, AMOUNT FLOAT)

DROP TABLE IF EXISTS #ibft_out
SELECT TOP(0) iios.[FINANCIAL ID], iios.DATE, iios.[TRXN DATETIME], iios.AMOUNT, iios.[CUSTOMER NO], iios.[PRODUCT NAME], 0 AS [NET OUT FLOW]
INTO #ibft_out
FROM #TestData_Ranked iios

-- Declare these once, outside a loop, to cut down on wasted work
DECLARE @currRow INT = 0,
        @currFinancialID BIGINT = 0,
        @currDateTime DATETIME,
        @currAmt FLOAT,
        @currCustomer VARCHAR(20),
        @currTrxType VARCHAR(10),
        @fundsAvailable FLOAT;
        
-- We want a one-way cursor as fast as we can
DECLARE trx_cursor CURSOR LOCAL FAST_FORWARD FOR
   SELECT 
      ROWNUMBER,
      [Financial ID],
      [Trxn Datetime],
      [Amount],
      [Customer No],
      [Product Name]
    FROM #TestData_Ranked
    ORDER BY ROWNUMBER ASC

OPEN trx_cursor
FETCH NEXT FROM trx_cursor INTO @currRow, 
                                @currFinancialID,
                                @currDateTime,
                                @currAmt,
                                @currCustomer,
                                @currTrxType
WHILE @@FETCH_STATUS = 0
BEGIN
    -- If this is an IN transaction, INSERT or UPDATE our bucket of rupees
    IF @currTrxType = 'IN FLOW' 
        MERGE #ibft_in as Tgt
        USING (select @currCustomer as cno, @currDateTime as dt, @currAmt as amt) as Src
        ON tgt.[CUSTOMER NO] = cno
        WHEN NOT MATCHED BY TARGET THEN INSERT 
            VALUES (cno, dt, amt) 
        WHEN MATCHED THEN UPDATE
            SET Tgt.[TRXN DATETIME] = dt,
                Tgt.AMOUNT = tgt.amount + amt;

    -- OTHERWISE, calculate our NET OUT
    ELSE 
    BEGIN
        SELECT @fundsAvailable = ISNULL((SELECT ISNULL(ii.AMOUNT, 0) FROM #ibft_in ii WHERE ii.[CUSTOMER NO] = @currCustomer), 0)

        INSERT INTO #ibft_out 
        VALUES ( 
            @currFinancialID, 
            CAST(@currDateTime as DATE), 
            @currDateTime, 
            @currAmt, 
            @currCustomer, 
            @currTrxType, 
            ISNULL(CASE WHEN (@currAmt - @fundsAvailable)<0 THEN 0 ELSE (@currAmt - @fundsAvailable) END, 0)
        )

        UPDATE #ibft_in
        SET AMOUNT = CASE WHEN (@fundsAvailable - @currAmt)<0 THEN 0 ELSE (@fundsAvailable - @currAmt) END
        WHERE [CUSTOMER NO] = @currCustomer
    END
        

FETCH NEXT FROM trx_cursor INTO @currRow, 
                                @currFinancialID,
                                @currDateTime,
                                @currAmt,
                                @currCustomer,
                                @currTrxType
END
CLOSE trx_cursor
DEALLOCATE trx_cursor

select * from #ibft_in
select * from #ibft_out

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM