簡體   English   中英

SQL Server 迭代數百萬行的更好方法

[英]SQL Server better way to iterate through millions of rows

我正在處理 SAP Timesheet 數據,因此有數百萬行。 我想要做的是從 SAP 表中選擇數據並將其插入到 MS SQL Server 上的表中。

所以我想插入原始記錄,然后如果對原始記錄進行更新,這是一個帶有refcounter的新 SAP 記錄的形式,我想在我的表中找到原始記錄並更新它,保留原始記錄計數器值。

所以我用游標成功地做到了這一點(我知道不是最好的),但是有數百萬條記錄,我想知道是否有更快的方法,因為我在游標運行的第 4 天。 有沒有比我下面更好的方法:

BEGIN
    CREATE TABLE CATSDB 
        (
            [COUNTER] nvarchar(12),
            REFCOUNTER nvarchar(12),
            PERNR nvarchar(8),
            WORKDATE nvarchar(8),
            CATSHOURS decimal(7, 3),
            APDAT nvarchar(8),
            LAETM nvarchar(6),
            CATS_STATUS nvarchar(2),
            APPR_STATUS nvarchar(2)
        )   

    INSERT INTO CATSDB
            (
                [COUNTER],REFCOUNTER,PERNR,WORKDATE,CATSHOURS,APDAT,LAETM,CATS_STATUS,APPR_STATUS
            )
        VALUES
            ('000421692670',NULL,'00000071','20190114','6.00','20190204','174541','30','30'),
            ('000421692671',NULL,'00000071','20190114','3.00','20190204','174541','30','30'),
            ('000421692672',NULL,'00000071','20190115','6.00','00000000','000000','60','20'),
            ('000421692673',NULL,'00000071','20190115','3.00','00000000','000000','60','20'),
            ('000421692712','000421692672','00000071','20190115','0.00','20190115','111007','30','30'),
            ('000421692713','000421692673','00000071','20190115','0.00','20190115','111007','30','30'),
            ('000429718015',NULL,'00000072','20190313','7.00','00000000','000000','60','20'),
            ('000429718016',NULL,'00000072','20190313','1.50','20190315','164659','30','30'),
            ('000429718017',NULL,'00000072','20190313','1.00','20190315','164659','30','30'),
            ('000430154143',NULL,'00000072','20190313','2.00','00000000','000000','60','20'),
            ('000430154142','000429718015','00000072','20190313','5.00','00000000','000000','60','20'),
            ('000430154928','000430154142','00000072','20190313','4.50','20190315','164659','30','30'),
            ('000430154929','000430154143','00000072','20190313','2.50','20190315','164659','30','30'),
            ('000429774620',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
            ('000429774619',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
            ('000429802106','000429774620','00000152','20190314','2.00','00000000','000000','60','20'),
            ('000429802105','000429774619','00000152','20190314','3.00','00000000','000000','60','20'),
            ('000429840242','000429802106','00000152','20190314','4.00','20190315','143857','30','30'),
            ('000429840241','000429802105','00000152','20190314','5.00','20190315','143857','30','30')

    CREATE TABLE [TBL_COUNTER]
        (
            [COUNTER] [varchar](12) NOT NULL,
            [REFCOUNTER] [varchar](12) NULL
        )   

    CREATE TABLE TEMP
        (
            [COUNTER] [nvarchar](12) NOT NULL,
            [REFCOUNTER] [nvarchar](12) NULL,
            [PERNR] [nvarchar](8) NULL,
            [WORKDATE] [nvarchar](8) NULL,
            [CATSHOURS] [decimal](7, 3) NULL,
            [APDAT] [nvarchar](8) NULL,
            [LAETM] [nvarchar](6) NULL,
            [CATS_STATUS] [nvarchar](2) NULL,
            [APPR_STATUS] [nvarchar](2) NULL
        )       
END

BEGIN
    DECLARE     @COUNTER nvarchar(12),  
                @REFCOUNTER nvarchar(12),   
                @PERNR nvarchar(8), 
                @WORKDATE nvarchar(8),  
                @CATSHOURS decimal(7, 3),
                @APDAT nvarchar(8),
                @LAETM nvarchar(6),
                @CATS_STATUS nvarchar(2),
                @APPR_STATUS nvarchar(2)

    DECLARE @orig_counter nvarchar(12)
END

BEGIN
    DECLARE curs CURSOR FOR
        SELECT 
                [COUNTER],
                REFCOUNTER,
                PERNR,
                WORKDATE,
                CATSHOURS,
                APDAT,
                LAETM,
                CATS_STATUS,
                APPR_STATUS
        FROM 
                CATSDB
END

BEGIN
    OPEN curs
END

BEGIN
    FETCH NEXT FROM curs INTO
        @COUNTER,
        @REFCOUNTER,
        @PERNR,
        @WORKDATE,
        @CATSHOURS,
        @APDAT,
        @LAETM,
        @CATS_STATUS,
        @APPR_STATUS
END

BEGIN
    WHILE @@FETCH_STATUS = 0
        BEGIN
            BEGIN
                IF NOT EXISTS (SELECT * FROM TBL_COUNTER WHERE [COUNTER] = @COUNTER)
                    BEGIN
                        INSERT INTO TBL_COUNTER
                                ([COUNTER]
                                ,REFCOUNTER)
                            VALUES
                                (@COUNTER
                                ,@REFCOUNTER)
                    END
            END
            BEGIN
                IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)
                    BEGIN
                            --If REFCOUNTER is populated, get the original COUNTER value, then update that row with the new values. Otherwise insert new record
                            IF @REFCOUNTER <> '' AND @REFCOUNTER IS NOT NULL
                                BEGIN
                                    BEGIN
                                        WITH n([COUNTER], REFCOUNTER) AS 
                                            (
                                                SELECT 
                                                        cnt.[COUNTER], 
                                                        cnt.REFCOUNTER 
                                                FROM 
                                                        TBL_COUNTER cnt
                                                WHERE 
                                                        cnt.[COUNTER] = @REFCOUNTER
                                            UNION ALL
                                                SELECT 
                                                        nplus1.[COUNTER], 
                                                        nplus1.REFCOUNTER 
                                                FROM 
                                                        TBL_COUNTER as nplus1, 
                                                        n
                                                WHERE 
                                                        n.[COUNTER] = nplus1.REFCOUNTER
                                            )
                                        SELECT @orig_counter = [COUNTER] FROM n WHERE REFCOUNTER = '' OR REFCOUNTER IS NULL
                                    END
                                    BEGIN
                                        UPDATE TEMP
                                           SET 
                                               [REFCOUNTER] = @REFCOUNTER
                                              ,[PERNR] = @PERNR 
                                              ,[WORKDATE] = @WORKDATE                                               
                                              ,[CATSHOURS] = @CATSHOURS                                                                                    
                                              ,[APDAT] = @APDAT                                        
                                              ,[LAETM] = @LAETM
                                              ,[CATS_STATUS] = @CATS_STATUS
                                              ,[APPR_STATUS] = @APPR_STATUS                                        
                                            WHERE [COUNTER] = @orig_counter
                                    END
                                END
                            ELSE
                                BEGIN
                                    INSERT INTO TEMP
                                               ([COUNTER]
                                               ,[REFCOUNTER]                                               
                                               ,[PERNR]                                               
                                               ,[WORKDATE]                                               
                                               ,[CATSHOURS]                                             
                                               ,[APDAT]                                              
                                               ,[LAETM]
                                               ,[CATS_STATUS]                                               
                                               ,[APPR_STATUS])                                              
                                         VALUES
                                               (@COUNTER
                                               ,@REFCOUNTER                                              
                                               ,@PERNR                                               
                                               ,@WORKDATE                                             
                                               ,@CATSHOURS                                               
                                               ,@APDAT                                               
                                               ,@LAETM                                               
                                               ,@CATS_STATUS                                               
                                               ,@APPR_STATUS)                                               
                                END
                    END

            FETCH NEXT FROM curs INTO
                @COUNTER,
                @REFCOUNTER,
                @PERNR,
                @WORKDATE,
                @CATSHOURS,
                @APDAT,
                @LAETM,
                @CATS_STATUS,
                @APPR_STATUS
        END
    END
END

BEGIN
    CLOSE curs
    DEALLOCATE curs
END

我縮短了它並為你們所有人創建了表格,以便能夠看到發生了什么。 預期的結果是

+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
|   COUNTER    |  REFCOUNTER  |  PERNR   | WORKDATE | CATSHOURS |  APDAT   | LAETM  | CATS_STATUS | APPR_STATUS |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| 000421692670 | NULL         | 00000071 | 20190114 |      6.00 | 20190204 | 174541 |          30 |          30 |
| 000421692671 | NULL         | 00000071 | 20190114 |      3.00 | 20190204 | 174541 |          30 |          30 |
| 000421692672 | 000421692672 | 00000071 | 20190115 |      0.00 | 20190115 | 111007 |          30 |          30 |
| 000421692673 | 000421692673 | 00000071 | 20190115 |      0.00 | 20190115 | 111007 |          30 |          30 |
| 000429718015 | 000430154142 | 00000072 | 20190313 |      4.50 | 20190315 | 164659 |          30 |          30 |
| 000429718016 | NULL         | 00000072 | 20190313 |      1.50 | 20190315 | 164659 |          30 |          30 |
| 000429718017 | NULL         | 00000072 | 20190313 |       1.0 | 20190315 | 164659 |          30 |          30 |
| 000430154143 | 000430154143 | 00000072 | 20190313 |      2.50 | 20190315 | 164659 |          30 |          30 |
| 000429774620 | 000429774620 | 00000152 | 20190314 |      2.00 | 00000000 | 000000 |          60 |          20 |
| 000429774619 | 000429802105 | 00000152 | 20190314 |      5.00 | 20190315 | 143857 |          30 |          30 |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+

我需要補充一下。 所以這有兩個階段。 第一階段是我將拉取 2019 年的所有數據以初始加載我的表。 然后每周,我將從原始源中提取數據以獲取新記錄和上次運行時更改的記錄。 所以我不會每周都有完整的鏈條。 需要有一種方法可以在沒有完整數據集的情況下返回原始計數器值,這就是為什么我有計數器表的原因。 我很抱歉沒有更清楚。 我忙於工作,無法像我計划的那樣專注於此。 我正在嘗試所有這些不同的技術。

我相信,以下查詢將幫助您開始,這是接近您目標的非常有效的方式。

它的創建是為了在中央位置維護 SQL Server 的歷史信息,並執行以下活動,您必須在相應的腳本塊中包含/替換您的表結構

  1. 創建temp
  2. 使用OPENQUERY通過Lined Servers (來源)從多個服務器收集信息並加載到Temp表中。
  3. Temp表上創建索引
  4. 使用 3 個場景將數據加載到中央表(目標)中(如腳本中所述)

注意:根據您的情況替換腳本


BEGIN
    Create Table #SrcTemp
                    (   AENAM nvarchar(12),
                        AUTYP nvarchar(2),
                        AWART nvarchar(4),
                        BELNR nvarchar(10),
                        CATSHOURS decimal(7, 3),
                        CATSQUANTITY decimal(18, 3),
                        CHARGE_HOLD nvarchar(24),
                        [COUNTER] nvarchar(12),
                        ERNAM nvarchar(12),
                        ERSDA nvarchar(8),
                        ERSTM nvarchar(6),
                        HRCOSTASG nvarchar(1),
                        LAEDA nvarchar(8),
                        LSTAR nvarchar(6),
                        LTXA1 nvarchar(40),
                        MANDT nvarchar(3),
                        PERNR nvarchar(8),
                        RAPLZL nvarchar(8),
                        RAUFPL nvarchar(10),
                        REFCOUNTER nvarchar(12),
                        RNPLNR nvarchar(12),
                        SKOSTL nvarchar(10),
                        CATS_STATUS nvarchar(2),
                        SUPP3 nvarchar(10),
                        WORKDATE nvarchar(8),
                        ZZOH_ORDER nvarchar(24),
                        APDAT nvarchar(8),
                        APNAM nvarchar(12),
                        LAETM nvarchar(6),
                        APPR_STATUS nvarchar(2)
                    );

--    DECLARE @orig_counter nvarchar(12)
END
    UPDATE #SrcTemp SET REFCOUNTER = '0' WHERE  REFCOUNTER = '' or REFCOUNTER is null;
    CREATE Clustered Index CLU_SrvTemp on #SrcTemp ([COUNTER], REFCOUNTER);

BEGIN

        INSERT INTO #SrcTemp
        SELECT 
                AENAM,AUTYP,AWART,BELNR,CATSHOURS,CATSQUANTITY,CHARGE_HOLD,[COUNTER],ERNAM,ERSDA,ERSTM,HRCOSTASG,LAEDA,LSTAR,LTXA1,MANDT,
                PERNR,RAPLZL,RAUFPL,REFCOUNTER,RNPLNR,SKOSTL,CATS_STATUS,SUPP3,WORKDATE,ZZOH_ORDER,APDAT,APNAM,LAETM,APPR_STATUS
        FROM    
                CATSDB;
END

--BEGIN
--    OPEN curs
--END

-- Scope: UNCHANGED Records ==================================================================================================================================

    IF EXISTS 
        (select *
         from   (
                    SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER] ORDER BY COUNTER) AS RN
                    FROM    #SrcTemp
                    WHERE REFCOUNTER = '0'
                ) as t where t.RN > 1
         )
        BEGIN
            RAISERROR ('Primary key violation occurred in "UNCHANGED" records processing block', 16, 1) with NOWAIT;
        END
    ELSE 

    BEGIN
    -- When NON-CHANGED Records NOT Existed in SQL table -------------------------------------------
            BEGIN
                INSERT INTO TEMP  ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
                                    ,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
                                    ,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
                                    ,[LAETM],[APPR_STATUS]
                                    )
                SELECT    s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
                        , s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
                        , s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
                        , s.[LAETM], s.[APPR_STATUS]
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on s.COUNTER = d.COUNTER
                WHERE (S.REFCOUNTER = '0') and D.COUNTER is null ;
            END

        -- When NON-CHANGED Records Existed in SQL table -------------------------------------------
            BEGIN
                UPDATE S
                    SET [AENAM] = D.AENAM
                        ,[AUTYP] = D.AUTYP
                        ,[AWART] = D.AWART
                        ,[BELNR] = D.BELNR
                        ,[CATSHOURS] = D.CATSHOURS
                        ,[CATSQUANTITY] = D.CATSQUANTITY
                        ,[CHARGE_HOLD] = D.CHARGE_HOLD
                        ,[ERNAM] = D.ERNAM
                        ,[ERSDA] = D.ERSDA
                        ,[ERSTM] = D.ERSTM
                        ,[HRCOSTASG] = D.HRCOSTASG
                        ,[LAEDA] = D.LAEDA
                        ,[LSTAR] = D.LSTAR
                        ,[LTXA1] = D.LTXA1
                        ,[MANDT] = D.MANDT
                        ,[PERNR] = D.PERNR
                        ,[RAPLZL] = D.RAPLZL
                        ,[RAUFPL] = D.RAUFPL
                        ,[REFCOUNTER] = D.REFCOUNTER
                        ,[RNPLNR] = D.RNPLNR
                        ,[SKOSTL] = D.SKOSTL
                        ,[CATS_STATUS] = D.CATS_STATUS
                        ,[SUPP3] = D.SUPP3
                        ,[WORKDATE] = D.WORKDATE
                        ,[ZZOH_ORDER] = D.ZZOH_ORDER
                        ,[APDAT] = D.APDAT
                        ,[APNAM] = D.APNAM
                        ,[LAETM] = D.LAETM
                        ,[APPR_STATUS] = D.APPR_STATUS
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on    (s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER)
                WHERE (S.REFCOUNTER = '0') and D.COUNTER is NOT null 
            END
    END

-- Scope: CHANGED Records ==================================================================================================================================

    IF EXISTS 
        (select *
         from   (
                    SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER], REFCOUNTER ORDER BY [COUNTER]) AS RN
                    FROM    #SrcTemp
                    WHERE not REFCOUNTER = '0' 
                ) as t where t.RN > 1
         )
        BEGIN
            RAISERROR ('Primary key violation occurred in "CHANGED" records processing block', 10, 1) with NOWAIT;
        END
    ELSE

    BEGIN
        -- When CHANGED Records NOT Existed in SQL table -------------------------------------------
            BEGIN
                INSERT INTO TEMP  ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
                                    ,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
                                    ,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
                                    ,[LAETM],[APPR_STATUS]
                                    )
                SELECT    s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
                        , s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
                        , s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
                        , s.[LAETM], s.[APPR_STATUS]
                FROM    #SrcTemp as S
                        LEFT JOIN
                            TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
                WHERE (not S.REFCOUNTER = '0') and D.COUNTER is null 
            END

        -- When NON-CHANGED Records Existed in SQL table -------------------------------------------
            BEGIN
                UPDATE S
                    SET [AENAM] = D.AENAM
                        ,[AUTYP] = D.AUTYP
                        ,[AWART] = D.AWART
                        ,[BELNR] = D.BELNR
                        ,[CATSHOURS] = D.CATSHOURS
                        ,[CATSQUANTITY] = D.CATSQUANTITY
                        ,[CHARGE_HOLD] = D.CHARGE_HOLD
                        ,[ERNAM] = D.ERNAM
                        ,[ERSDA] = D.ERSDA
                        ,[ERSTM] = D.ERSTM
                        ,[HRCOSTASG] = D.HRCOSTASG
                        ,[LAEDA] = D.LAEDA
                        ,[LSTAR] = D.LSTAR
                        ,[LTXA1] = D.LTXA1
                        ,[MANDT] = D.MANDT
                        ,[PERNR] = D.PERNR
                        ,[RAPLZL] = D.RAPLZL
                        ,[RAUFPL] = D.RAUFPL
                        ,[REFCOUNTER] = D.REFCOUNTER
                        ,[RNPLNR] = D.RNPLNR
                        ,[SKOSTL] = D.SKOSTL
                        ,[CATS_STATUS] = D.CATS_STATUS
                        ,[SUPP3] = D.SUPP3
                        ,[WORKDATE] = D.WORKDATE
                        ,[ZZOH_ORDER] = D.ZZOH_ORDER
                        ,[APDAT] = D.APDAT
                        ,[APNAM] = D.APNAM
                        ,[LAETM] = D.LAETM
                        ,[APPR_STATUS] = D.APPR_STATUS
                FROM    #SrcTemp as S
                        LEFT JOIN
                        TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
                WHERE (not S.REFCOUNTER = '0' ) and D.COUNTER is NOT null 
            END
    END

Drop table #SrcTemp;

看起來它可以通過一個簡單的遞歸查詢來完成。 擁有合適的索引也很重要。

樣本數據

這就是您的示例數據在問題中的樣子。 只有幾個相關的列。 最好包括幾組/一系列更改,而不僅僅是一組。 只有這些示例數據會使您更難驗證所提供的解決方案是否正確。

+-----------+---------------------+-----------+------------+
|   BELNR   |     CHARGE_HOLD     |  COUNTER  | REFCOUNTER |
+-----------+---------------------+-----------+------------+
| 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL       |
| 417549506 | T4-GS023-ABC2       | 420203329 | 420202428  |
| 417553156 | JGS023001    0010#* | 420206979 | 420203329  |
| 417557221 | T4-GS023-ABC2       | 420211044 | 420206979  |
| 417581675 | JGS023001    0010#* | 420235498 | 420211044  |
| 417677969 | JGS023001    0010#* | 420331792 | 420235498  |
+-----------+---------------------+-----------+------------+

查詢的主要遞歸部分

WITH
CTE
AS
(
    SELECT
        1 AS Lvl,
        CATSDB.BELNR AS OriginalBELNR,
        CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
        CATSDB.[COUNTER] AS OriginalCOUNTER,
        CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
    WHERE
        REFCOUNTER IS NULL

    UNION ALL

    SELECT
        CTE.Lvl + 1 AS Lvl,
        CTE.OriginalBELNR,
        CTE.OriginalCHARGE_HOLD,
        CTE.OriginalCOUNTER,
        CTE.OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
        INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
SELECT * FROM CTE;

中間結果

+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR  |   NewCHARGE_HOLD    | NewCOUNTER | NewREFCOUNTER |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
|   1 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417548605 | T4-GS023ABC2 0150#* |  420202428 | NULL          |
|   2 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417549506 | T4-GS023-ABC2       |  420203329 | 420202428     |
|   3 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417553156 | JGS023001    0010#* |  420206979 | 420203329     |
|   4 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417557221 | T4-GS023-ABC2       |  420211044 | 420206979     |
|   5 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417581675 | JGS023001    0010#* |  420235498 | 420211044     |
|   6 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417677969 | JGS023001    0010#* |  420331792 | 420235498     |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+

您可以看到我們已經獲取了鏈的起始行(其中RefCounter is NULL )並將其RefCounter is NULL整個更改鏈。

現在我們只需要選擇具有最后更改的行,即每個起始行的最大Lvl 一種方法是使用具有適當分區的ROW_NUMBER函數。

最終查詢

WITH
CTE
AS
(
    SELECT
        1 AS Lvl,
        CATSDB.BELNR AS OriginalBELNR,
        CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
        CATSDB.[COUNTER] AS OriginalCOUNTER,
        CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
    WHERE
        REFCOUNTER IS NULL

    UNION ALL

    SELECT
        CTE.Lvl + 1 AS Lvl,
        CTE.OriginalBELNR,
        CTE.OriginalCHARGE_HOLD,
        CTE.OriginalCOUNTER,
        CTE.OrginalREFCOUNTER,
        CATSDB.BELNR AS NewBELNR,
        CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
        CATSDB.[COUNTER] AS NewCOUNTER,
        CATSDB.REFCOUNTER AS NewREFCOUNTER
    FROM
        CATSDB
        INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
,CTE_rn
AS
(
    SELECT
        *
        ,ROW_NUMBER() OVER (PARTITION BY OriginalCOUNTER ORDER BY Lvl DESC) AS rn
    FROM CTE
)
SELECT *
FROM CTE_rn
WHERE rn = 1
--OPTION (MAXRECURSION 0)
;

如果您可以擁有超過 100 的鏈,則應向查詢添加OPTION (MAXRECURSION 0) ,因為默認情況下 SQL Server 將遞歸深度限制為 100。

結果

+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR  |   NewCHARGE_HOLD    | NewCOUNTER | NewREFCOUNTER | rn |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
|   6 |     417548605 | T4-GS023ABC2 0150#* |       420202428 | NULL              | 417677969 | JGS023001    0010#* |  420331792 |     420235498 |  1 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+

效率

為了讓它有效地工作,我們需要在REFCOUNTER列上有一個索引。 此外,查詢假定REFCOUNTER為 NULL,而不是'' 如果您混合使用 NULL 和空字符串,請統一您的數據,否則索引將無用。 這個索引是你需要的最低限度。

理想情況下,您應該在REFCOUNTER列上有一個 CLUSTERED 索引,因為查詢總是從表中選擇所有列。

CREATE CLUSTERED INDEX [IX_RefCounter] ON [dbo].[CATSDB]
(
    [REFCOUNTER] ASC
)

如果您無法更改原始表的索引,我建議將所有數百萬行復制到臨時表中,並為該臨時表創建此聚集索引。

我對這個聚集索引有一個很好的計划。

計划

您可以采取一些措施來提高性能:

將 COUNTER 和 REFCOUNTER 從 nvarchar 轉換為數據類型 int,對 int 的操作比字符快得多。 不要使用游標,您仍然可以使用 while 循環一次處理一條記錄。

DECLARE @CCOUNTER int = 0
WHILE (1 = 1)
BEGIN
    /* SELECT @COUNTER = MIN(COUNTER) > @COUNTER FROM CATSDB */
    /* IF @@ROWCOUNT != 1 THEN BREAK OUT OF THE WHILE LOOP, WE ARE DONE */
    /* SELECT RECORD FOR THIS @COUNTER FROM CATSDB */
    /* DO THE PROCESSING FOR THIS RECORD */
END

有一種稱為 sql Bulk copy 的方法,我不知道它會幫助解決您的問題,但請嘗試一下。

執行此操作的最高效方法是通過 BCP。 https://docs.microsoft.com/en-us/sql/tools/bcp-utility?view=sql-server-2017

您可以將所有數據 BCP 到 SQL Server 中的臨時表中,然后運行插入和更新。 此外,在檢查不存在的記錄以確定這是插入還是更新時,“IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)”非常昂貴。

執行此操作的更高效方法的示例:(表名稱TBL_SOURCETBL_DESTINATION#TBL_UPDATES#TBL_INSERTS

SELECT * into #TBL_INSERTS
FROM TBL_SOURCE S
    left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is null

SELECT * into #TBL_UPDATES
FROM TBL_SOURCE S
    left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is not null

更新將在#tbl_updates 中捕獲並在#tbl_inserts 中插入

查看基於少量樣本數據和給定輸出,我們的腳本不能 100% OK 和優化,其中需要更新的數百萬數據是關注點。

我對我的腳本充滿信心,在完全理解需求后,它可以朝那個方向改進。

首先我想知道為什么數據類型是nvarchar ,如果可能的話讓它變成varchar,int,datetime

如果您可以更改數據類型,那么它會對性能產生影響。

也沒有應該是Clustered Index標識列。

從性能的角度來看,這兩點很重要。

所以在我的例子中,

CREATE TABLE CATSDB 
        (
            id int identity ,
            [COUNTER] nvarchar(12),
            REFCOUNTER nvarchar(12),
            PERNR nvarchar(8),
            WORKDATE nvarchar(8),
            CATSHOURS decimal(7, 3),
            APDAT nvarchar(8),
            LAETM nvarchar(6),
            CATS_STATUS nvarchar(2),
            APPR_STATUS nvarchar(2)
        )   

ALTER TABLE CATSDB
ADD CONSTRAINT PK_CATSDB_ID PRIMARY KEY CLUSTERED(ID)

CREATE NONCLUSTERED INDEX FICATSDB_REFCOUNTER ON CATSDB(REFCOUNTER,[COUNTER]);




IF OBJECT_ID('tempdb..#TEMP', 'U') IS NOT NULL
    DROP TABLE #TEMP;

CREATE TABLE #TEMP
(UpdateID      INT,
 FINDID        INT
 PRIMARY KEY,
 [COUNTER]     [NVARCHAR](12) NOT NULL,
 [REFCOUNTER]  [NVARCHAR](12) NULL,
 [PERNR]       [NVARCHAR](8) NULL,
 [WORKDATE]    [NVARCHAR](8) NULL,
 [CATSHOURS]   [DECIMAL](7, 3) NULL,
 [APDAT]       [NVARCHAR](8) NULL,
 [LAETM]       [NVARCHAR](6) NULL,
 [CATS_STATUS] [NVARCHAR](2) NULL,
 [APPR_STATUS] [NVARCHAR](2) NULL
);

WITH CTE
     AS (SELECT a.id,
                a.[COUNTER],
                a.REFCOUNTER,
                a.id AS Findid
         FROM dbo.CATSDB A

         UNION ALL
         SELECT b.id,
                a.[COUNTER],
                a.[refCOUNTER],
                a.id
         FROM dbo.CATSDB A
              INNER JOIN CTE b ON(a.REFCOUNTER = b.[COUNTER])
         WHERE a.id >= b.Findid),
     CTE1
     AS (SELECT id,
                MAX(Findid) Findid
         FROM CTE
         GROUP BY id)

     INSERT INTO #TEMP
     (UpdateID,
      FINDID,
      [COUNTER],
      [REFCOUNTER],
      [PERNR],
      [WORKDATE],
      [CATSHOURS],
      [APDAT],
      [LAETM],
      [CATS_STATUS],
      [APPR_STATUS]
     )
            SELECT c1.ID,
                   c1.FINDID,
                   a.COUNTER,
                   a.REFCOUNTER,
                   a.PERNR,
                   a.WORKDATE,
                   a.CATSHOURS,
                   a.APDAT,
                   a.LAETM,
                   a.CATS_STATUS,
                   a.APPR_STATUS
            FROM dbo.CATSDB A
                 INNER JOIN CTE1 c1 ON a.id = c1.Findid;

BEGIN TRY
    BEGIN TRAN;

    UPDATE A
      SET
          [REFCOUNTER] = b.REFCOUNTER,
          [PERNR] = b.PERNR,
          [WORKDATE] = b.WORKDATE,
          [CATSHOURS] = b.CATSHOURS,
          [APDAT] = b.APDAT,
          [LAETM] = b.LAETM,
          [CATS_STATUS] = b.CATS_STATUS,
          [APPR_STATUS] = b.APPR_STATUS
    FROM CATSDB A
         INNER JOIN #TEMP B ON a.id = b.UpdateID;

    -- this is only test query
    SELECT c1.UpdateID AS UpdateID,
           a.*
    FROM dbo.CATSDB A
         INNER JOIN #TEMP c1 ON a.id = c1.Findid;

    IF(@@trancount > 0)
        ROLLBACK; -- commit
END TRY
BEGIN CATCH
    IF(@@trancount > 0)
        ROLLBACK;
END CATCH;

#Temp should be permanent table.

IMO,您的表非常需要身份列,該列應該是身份和聚集索引。

你可以嘗試,你可以改變它。

REFCOUNTER,COUNTER應該是非聚集索引。

只有在優化查詢並在索引上方使用適當的 PLAN 之后,才能提高性能。

適當的計划:您應該使用Recursive or RBAR並一次更新數百萬條記錄,還是應該Batch update

您可以首先使用Rollback測試包含數百萬行的腳本。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM