[英]SQL Server better way to iterate through millions of rows
我正在處理 SAP Timesheet 數據,因此有數百萬行。 我想要做的是從 SAP 表中選擇數據並將其插入到 MS SQL Server 上的表中。
所以我想插入原始記錄,然后如果對原始記錄進行更新,這是一個帶有refcounter
的新 SAP 記錄的形式,我想在我的表中找到原始記錄並更新它,保留原始記錄計數器值。
所以我用游標成功地做到了這一點(我知道不是最好的),但是有數百萬條記錄,我想知道是否有更快的方法,因為我在游標運行的第 4 天。 有沒有比我下面更好的方法:
BEGIN
CREATE TABLE CATSDB
(
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
INSERT INTO CATSDB
(
[COUNTER],REFCOUNTER,PERNR,WORKDATE,CATSHOURS,APDAT,LAETM,CATS_STATUS,APPR_STATUS
)
VALUES
('000421692670',NULL,'00000071','20190114','6.00','20190204','174541','30','30'),
('000421692671',NULL,'00000071','20190114','3.00','20190204','174541','30','30'),
('000421692672',NULL,'00000071','20190115','6.00','00000000','000000','60','20'),
('000421692673',NULL,'00000071','20190115','3.00','00000000','000000','60','20'),
('000421692712','000421692672','00000071','20190115','0.00','20190115','111007','30','30'),
('000421692713','000421692673','00000071','20190115','0.00','20190115','111007','30','30'),
('000429718015',NULL,'00000072','20190313','7.00','00000000','000000','60','20'),
('000429718016',NULL,'00000072','20190313','1.50','20190315','164659','30','30'),
('000429718017',NULL,'00000072','20190313','1.00','20190315','164659','30','30'),
('000430154143',NULL,'00000072','20190313','2.00','00000000','000000','60','20'),
('000430154142','000429718015','00000072','20190313','5.00','00000000','000000','60','20'),
('000430154928','000430154142','00000072','20190313','4.50','20190315','164659','30','30'),
('000430154929','000430154143','00000072','20190313','2.50','20190315','164659','30','30'),
('000429774620',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429774619',NULL,'00000152','20190314','1.00','00000000','000000','60','20'),
('000429802106','000429774620','00000152','20190314','2.00','00000000','000000','60','20'),
('000429802105','000429774619','00000152','20190314','3.00','00000000','000000','60','20'),
('000429840242','000429802106','00000152','20190314','4.00','20190315','143857','30','30'),
('000429840241','000429802105','00000152','20190314','5.00','20190315','143857','30','30')
CREATE TABLE [TBL_COUNTER]
(
[COUNTER] [varchar](12) NOT NULL,
[REFCOUNTER] [varchar](12) NULL
)
CREATE TABLE TEMP
(
[COUNTER] [nvarchar](12) NOT NULL,
[REFCOUNTER] [nvarchar](12) NULL,
[PERNR] [nvarchar](8) NULL,
[WORKDATE] [nvarchar](8) NULL,
[CATSHOURS] [decimal](7, 3) NULL,
[APDAT] [nvarchar](8) NULL,
[LAETM] [nvarchar](6) NULL,
[CATS_STATUS] [nvarchar](2) NULL,
[APPR_STATUS] [nvarchar](2) NULL
)
END
BEGIN
DECLARE @COUNTER nvarchar(12),
@REFCOUNTER nvarchar(12),
@PERNR nvarchar(8),
@WORKDATE nvarchar(8),
@CATSHOURS decimal(7, 3),
@APDAT nvarchar(8),
@LAETM nvarchar(6),
@CATS_STATUS nvarchar(2),
@APPR_STATUS nvarchar(2)
DECLARE @orig_counter nvarchar(12)
END
BEGIN
DECLARE curs CURSOR FOR
SELECT
[COUNTER],
REFCOUNTER,
PERNR,
WORKDATE,
CATSHOURS,
APDAT,
LAETM,
CATS_STATUS,
APPR_STATUS
FROM
CATSDB
END
BEGIN
OPEN curs
END
BEGIN
FETCH NEXT FROM curs INTO
@COUNTER,
@REFCOUNTER,
@PERNR,
@WORKDATE,
@CATSHOURS,
@APDAT,
@LAETM,
@CATS_STATUS,
@APPR_STATUS
END
BEGIN
WHILE @@FETCH_STATUS = 0
BEGIN
BEGIN
IF NOT EXISTS (SELECT * FROM TBL_COUNTER WHERE [COUNTER] = @COUNTER)
BEGIN
INSERT INTO TBL_COUNTER
([COUNTER]
,REFCOUNTER)
VALUES
(@COUNTER
,@REFCOUNTER)
END
END
BEGIN
IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)
BEGIN
--If REFCOUNTER is populated, get the original COUNTER value, then update that row with the new values. Otherwise insert new record
IF @REFCOUNTER <> '' AND @REFCOUNTER IS NOT NULL
BEGIN
BEGIN
WITH n([COUNTER], REFCOUNTER) AS
(
SELECT
cnt.[COUNTER],
cnt.REFCOUNTER
FROM
TBL_COUNTER cnt
WHERE
cnt.[COUNTER] = @REFCOUNTER
UNION ALL
SELECT
nplus1.[COUNTER],
nplus1.REFCOUNTER
FROM
TBL_COUNTER as nplus1,
n
WHERE
n.[COUNTER] = nplus1.REFCOUNTER
)
SELECT @orig_counter = [COUNTER] FROM n WHERE REFCOUNTER = '' OR REFCOUNTER IS NULL
END
BEGIN
UPDATE TEMP
SET
[REFCOUNTER] = @REFCOUNTER
,[PERNR] = @PERNR
,[WORKDATE] = @WORKDATE
,[CATSHOURS] = @CATSHOURS
,[APDAT] = @APDAT
,[LAETM] = @LAETM
,[CATS_STATUS] = @CATS_STATUS
,[APPR_STATUS] = @APPR_STATUS
WHERE [COUNTER] = @orig_counter
END
END
ELSE
BEGIN
INSERT INTO TEMP
([COUNTER]
,[REFCOUNTER]
,[PERNR]
,[WORKDATE]
,[CATSHOURS]
,[APDAT]
,[LAETM]
,[CATS_STATUS]
,[APPR_STATUS])
VALUES
(@COUNTER
,@REFCOUNTER
,@PERNR
,@WORKDATE
,@CATSHOURS
,@APDAT
,@LAETM
,@CATS_STATUS
,@APPR_STATUS)
END
END
FETCH NEXT FROM curs INTO
@COUNTER,
@REFCOUNTER,
@PERNR,
@WORKDATE,
@CATSHOURS,
@APDAT,
@LAETM,
@CATS_STATUS,
@APPR_STATUS
END
END
END
BEGIN
CLOSE curs
DEALLOCATE curs
END
我縮短了它並為你們所有人創建了表格,以便能夠看到發生了什么。 預期的結果是
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| COUNTER | REFCOUNTER | PERNR | WORKDATE | CATSHOURS | APDAT | LAETM | CATS_STATUS | APPR_STATUS |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
| 000421692670 | NULL | 00000071 | 20190114 | 6.00 | 20190204 | 174541 | 30 | 30 |
| 000421692671 | NULL | 00000071 | 20190114 | 3.00 | 20190204 | 174541 | 30 | 30 |
| 000421692672 | 000421692672 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000421692673 | 000421692673 | 00000071 | 20190115 | 0.00 | 20190115 | 111007 | 30 | 30 |
| 000429718015 | 000430154142 | 00000072 | 20190313 | 4.50 | 20190315 | 164659 | 30 | 30 |
| 000429718016 | NULL | 00000072 | 20190313 | 1.50 | 20190315 | 164659 | 30 | 30 |
| 000429718017 | NULL | 00000072 | 20190313 | 1.0 | 20190315 | 164659 | 30 | 30 |
| 000430154143 | 000430154143 | 00000072 | 20190313 | 2.50 | 20190315 | 164659 | 30 | 30 |
| 000429774620 | 000429774620 | 00000152 | 20190314 | 2.00 | 00000000 | 000000 | 60 | 20 |
| 000429774619 | 000429802105 | 00000152 | 20190314 | 5.00 | 20190315 | 143857 | 30 | 30 |
+--------------+--------------+----------+----------+-----------+----------+--------+-------------+-------------+
我需要補充一下。 所以這有兩個階段。 第一階段是我將拉取 2019 年的所有數據以初始加載我的表。 然后每周,我將從原始源中提取數據以獲取新記錄和上次運行時更改的記錄。 所以我不會每周都有完整的鏈條。 需要有一種方法可以在沒有完整數據集的情況下返回原始計數器值,這就是為什么我有計數器表的原因。 我很抱歉沒有更清楚。 我忙於工作,無法像我計划的那樣專注於此。 我正在嘗試所有這些不同的技術。
我相信,以下查詢將幫助您開始,這是接近您目標的非常有效的方式。
它的創建是為了在中央位置維護 SQL Server 的歷史信息,並執行以下活動,您必須在相應的腳本塊中包含/替換您的表結構
temp
表OPENQUERY
通過Lined Servers
(來源)從多個服務器收集信息並加載到Temp
表中。Temp
表上創建索引注意:根據您的情況替換腳本
BEGIN
Create Table #SrcTemp
( AENAM nvarchar(12),
AUTYP nvarchar(2),
AWART nvarchar(4),
BELNR nvarchar(10),
CATSHOURS decimal(7, 3),
CATSQUANTITY decimal(18, 3),
CHARGE_HOLD nvarchar(24),
[COUNTER] nvarchar(12),
ERNAM nvarchar(12),
ERSDA nvarchar(8),
ERSTM nvarchar(6),
HRCOSTASG nvarchar(1),
LAEDA nvarchar(8),
LSTAR nvarchar(6),
LTXA1 nvarchar(40),
MANDT nvarchar(3),
PERNR nvarchar(8),
RAPLZL nvarchar(8),
RAUFPL nvarchar(10),
REFCOUNTER nvarchar(12),
RNPLNR nvarchar(12),
SKOSTL nvarchar(10),
CATS_STATUS nvarchar(2),
SUPP3 nvarchar(10),
WORKDATE nvarchar(8),
ZZOH_ORDER nvarchar(24),
APDAT nvarchar(8),
APNAM nvarchar(12),
LAETM nvarchar(6),
APPR_STATUS nvarchar(2)
);
-- DECLARE @orig_counter nvarchar(12)
END
UPDATE #SrcTemp SET REFCOUNTER = '0' WHERE REFCOUNTER = '' or REFCOUNTER is null;
CREATE Clustered Index CLU_SrvTemp on #SrcTemp ([COUNTER], REFCOUNTER);
BEGIN
INSERT INTO #SrcTemp
SELECT
AENAM,AUTYP,AWART,BELNR,CATSHOURS,CATSQUANTITY,CHARGE_HOLD,[COUNTER],ERNAM,ERSDA,ERSTM,HRCOSTASG,LAEDA,LSTAR,LTXA1,MANDT,
PERNR,RAPLZL,RAUFPL,REFCOUNTER,RNPLNR,SKOSTL,CATS_STATUS,SUPP3,WORKDATE,ZZOH_ORDER,APDAT,APNAM,LAETM,APPR_STATUS
FROM
CATSDB;
END
--BEGIN
-- OPEN curs
--END
-- Scope: UNCHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER] ORDER BY COUNTER) AS RN
FROM #SrcTemp
WHERE REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "UNCHANGED" records processing block', 16, 1) with NOWAIT;
END
ELSE
BEGIN
-- When NON-CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER
WHERE (S.REFCOUNTER = '0') and D.COUNTER is null ;
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on (s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER)
WHERE (S.REFCOUNTER = '0') and D.COUNTER is NOT null
END
END
-- Scope: CHANGED Records ==================================================================================================================================
IF EXISTS
(select *
from (
SELECT ROW_NUMBER () OVER (PARTITION BY [COUNTER], REFCOUNTER ORDER BY [COUNTER]) AS RN
FROM #SrcTemp
WHERE not REFCOUNTER = '0'
) as t where t.RN > 1
)
BEGIN
RAISERROR ('Primary key violation occurred in "CHANGED" records processing block', 10, 1) with NOWAIT;
END
ELSE
BEGIN
-- When CHANGED Records NOT Existed in SQL table -------------------------------------------
BEGIN
INSERT INTO TEMP ([AENAM],[AUTYP],[AWART],[BELNR],[CATSHOURS],[CATSQUANTITY],[CHARGE_HOLD],[COUNTER],[ERNAM]
,[ERSDA],[ERSTM],[HRCOSTASG],[LAEDA],[LSTAR],[LTXA1],[MANDT],[PERNR],[RAPLZL],[RAUFPL]
,[REFCOUNTER],[RNPLNR],[SKOSTL],[CATS_STATUS],[SUPP3],[WORKDATE],[ZZOH_ORDER],[APDAT],[APNAM]
,[LAETM],[APPR_STATUS]
)
SELECT s.[AENAM], s.[AUTYP], s.[AWART], s.[BELNR], s.[CATSHOURS], s.[CATSQUANTITY], s.[CHARGE_HOLD], s.[COUNTER], s.[ERNAM]
, s.[ERSDA], s.[ERSTM], s.[HRCOSTASG], s.[LAEDA], s.[LSTAR], s.[LTXA1], s.[MANDT], s.[PERNR], s.[RAPLZL], s.[RAUFPL]
, s.[REFCOUNTER], s.[RNPLNR], s.[SKOSTL], s.[CATS_STATUS], s.[SUPP3], s.[WORKDATE], s.[ZZOH_ORDER], s.[APDAT], s.[APNAM]
, s.[LAETM], s.[APPR_STATUS]
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0') and D.COUNTER is null
END
-- When NON-CHANGED Records Existed in SQL table -------------------------------------------
BEGIN
UPDATE S
SET [AENAM] = D.AENAM
,[AUTYP] = D.AUTYP
,[AWART] = D.AWART
,[BELNR] = D.BELNR
,[CATSHOURS] = D.CATSHOURS
,[CATSQUANTITY] = D.CATSQUANTITY
,[CHARGE_HOLD] = D.CHARGE_HOLD
,[ERNAM] = D.ERNAM
,[ERSDA] = D.ERSDA
,[ERSTM] = D.ERSTM
,[HRCOSTASG] = D.HRCOSTASG
,[LAEDA] = D.LAEDA
,[LSTAR] = D.LSTAR
,[LTXA1] = D.LTXA1
,[MANDT] = D.MANDT
,[PERNR] = D.PERNR
,[RAPLZL] = D.RAPLZL
,[RAUFPL] = D.RAUFPL
,[REFCOUNTER] = D.REFCOUNTER
,[RNPLNR] = D.RNPLNR
,[SKOSTL] = D.SKOSTL
,[CATS_STATUS] = D.CATS_STATUS
,[SUPP3] = D.SUPP3
,[WORKDATE] = D.WORKDATE
,[ZZOH_ORDER] = D.ZZOH_ORDER
,[APDAT] = D.APDAT
,[APNAM] = D.APNAM
,[LAETM] = D.LAETM
,[APPR_STATUS] = D.APPR_STATUS
FROM #SrcTemp as S
LEFT JOIN
TEMP as D on s.COUNTER = d.COUNTER and S.REFCOUNTER = D.REFCOUNTER
WHERE (not S.REFCOUNTER = '0' ) and D.COUNTER is NOT null
END
END
Drop table #SrcTemp;
看起來它可以通過一個簡單的遞歸查詢來完成。 擁有合適的索引也很重要。
樣本數據
這就是您的示例數據在問題中的樣子。 只有幾個相關的列。 最好包括幾組/一系列更改,而不僅僅是一組。 只有這些示例數據會使您更難驗證所提供的解決方案是否正確。
+-----------+---------------------+-----------+------------+
| BELNR | CHARGE_HOLD | COUNTER | REFCOUNTER |
+-----------+---------------------+-----------+------------+
| 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----------+---------------------+-----------+------------+
查詢的主要遞歸部分
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
SELECT * FROM CTE;
中間結果
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
| 1 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL |
| 2 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417549506 | T4-GS023-ABC2 | 420203329 | 420202428 |
| 3 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417553156 | JGS023001 0010#* | 420206979 | 420203329 |
| 4 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417557221 | T4-GS023-ABC2 | 420211044 | 420206979 |
| 5 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417581675 | JGS023001 0010#* | 420235498 | 420211044 |
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+
您可以看到我們已經獲取了鏈的起始行(其中RefCounter is NULL
)並將其RefCounter is NULL
整個更改鏈。
現在我們只需要選擇具有最后更改的行,即每個起始行的最大Lvl
。 一種方法是使用具有適當分區的ROW_NUMBER
函數。
最終查詢
WITH
CTE
AS
(
SELECT
1 AS Lvl,
CATSDB.BELNR AS OriginalBELNR,
CATSDB.CHARGE_HOLD AS OriginalCHARGE_HOLD,
CATSDB.[COUNTER] AS OriginalCOUNTER,
CATSDB.REFCOUNTER AS OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
WHERE
REFCOUNTER IS NULL
UNION ALL
SELECT
CTE.Lvl + 1 AS Lvl,
CTE.OriginalBELNR,
CTE.OriginalCHARGE_HOLD,
CTE.OriginalCOUNTER,
CTE.OrginalREFCOUNTER,
CATSDB.BELNR AS NewBELNR,
CATSDB.CHARGE_HOLD AS NewCHARGE_HOLD,
CATSDB.[COUNTER] AS NewCOUNTER,
CATSDB.REFCOUNTER AS NewREFCOUNTER
FROM
CATSDB
INNER JOIN CTE ON CATSDB.REFCOUNTER = CTE.NewCOUNTER
)
,CTE_rn
AS
(
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY OriginalCOUNTER ORDER BY Lvl DESC) AS rn
FROM CTE
)
SELECT *
FROM CTE_rn
WHERE rn = 1
--OPTION (MAXRECURSION 0)
;
如果您可以擁有超過 100 的鏈,則應向查詢添加OPTION (MAXRECURSION 0)
,因為默認情況下 SQL Server 將遞歸深度限制為 100。
結果
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| Lvl | OriginalBELNR | OriginalCHARGE_HOLD | OriginalCOUNTER | OrginalREFCOUNTER | NewBELNR | NewCHARGE_HOLD | NewCOUNTER | NewREFCOUNTER | rn |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
| 6 | 417548605 | T4-GS023ABC2 0150#* | 420202428 | NULL | 417677969 | JGS023001 0010#* | 420331792 | 420235498 | 1 |
+-----+---------------+---------------------+-----------------+-------------------+-----------+---------------------+------------+---------------+----+
效率
為了讓它有效地工作,我們需要在REFCOUNTER
列上有一個索引。 此外,查詢假定REFCOUNTER
為 NULL,而不是''
。 如果您混合使用 NULL 和空字符串,請統一您的數據,否則索引將無用。 這個索引是你需要的最低限度。
理想情況下,您應該在REFCOUNTER
列上有一個 CLUSTERED 索引,因為查詢總是從表中選擇所有列。
CREATE CLUSTERED INDEX [IX_RefCounter] ON [dbo].[CATSDB]
(
[REFCOUNTER] ASC
)
如果您無法更改原始表的索引,我建議將所有數百萬行復制到臨時表中,並為該臨時表創建此聚集索引。
我對這個聚集索引有一個很好的計划。
您可以采取一些措施來提高性能:
將 COUNTER 和 REFCOUNTER 從 nvarchar 轉換為數據類型 int,對 int 的操作比字符快得多。 不要使用游標,您仍然可以使用 while 循環一次處理一條記錄。
DECLARE @CCOUNTER int = 0
WHILE (1 = 1)
BEGIN
/* SELECT @COUNTER = MIN(COUNTER) > @COUNTER FROM CATSDB */
/* IF @@ROWCOUNT != 1 THEN BREAK OUT OF THE WHILE LOOP, WE ARE DONE */
/* SELECT RECORD FOR THIS @COUNTER FROM CATSDB */
/* DO THE PROCESSING FOR THIS RECORD */
END
有一種稱為 sql Bulk copy 的方法,我不知道它會幫助解決您的問題,但請嘗試一下。
執行此操作的最高效方法是通過 BCP。 https://docs.microsoft.com/en-us/sql/tools/bcp-utility?view=sql-server-2017 。
您可以將所有數據 BCP 到 SQL Server 中的臨時表中,然后運行插入和更新。 此外,在檢查不存在的記錄以確定這是插入還是更新時,“IF NOT EXISTS (SELECT * FROM TEMP WHERE [COUNTER] = @COUNTER)”非常昂貴。
執行此操作的更高效方法的示例:(表名稱TBL_SOURCE 、 TBL_DESTINATION 、 #TBL_UPDATES和#TBL_INSERTS )
SELECT * into #TBL_INSERTS
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is null
SELECT * into #TBL_UPDATES
FROM TBL_SOURCE S
left outer join TBL_DESTINATION D on S.COUNTER=D.COUNTER
WHERE D.Counter is not null
更新將在#tbl_updates 中捕獲並在#tbl_inserts 中插入
查看基於少量樣本數據和給定輸出,我們的腳本不能 100% OK 和優化,其中需要更新的數百萬數據是關注點。
我對我的腳本充滿信心,在完全理解需求后,它可以朝那個方向改進。
首先我想知道為什么數據類型是nvarchar
,如果可能的話讓它變成varchar,int,datetime
。
如果您可以更改數據類型,那么它會對性能產生影響。
也沒有應該是Clustered Index
標識列。
從性能的角度來看,這兩點很重要。
所以在我的例子中,
CREATE TABLE CATSDB
(
id int identity ,
[COUNTER] nvarchar(12),
REFCOUNTER nvarchar(12),
PERNR nvarchar(8),
WORKDATE nvarchar(8),
CATSHOURS decimal(7, 3),
APDAT nvarchar(8),
LAETM nvarchar(6),
CATS_STATUS nvarchar(2),
APPR_STATUS nvarchar(2)
)
ALTER TABLE CATSDB
ADD CONSTRAINT PK_CATSDB_ID PRIMARY KEY CLUSTERED(ID)
CREATE NONCLUSTERED INDEX FICATSDB_REFCOUNTER ON CATSDB(REFCOUNTER,[COUNTER]);
IF OBJECT_ID('tempdb..#TEMP', 'U') IS NOT NULL
DROP TABLE #TEMP;
CREATE TABLE #TEMP
(UpdateID INT,
FINDID INT
PRIMARY KEY,
[COUNTER] [NVARCHAR](12) NOT NULL,
[REFCOUNTER] [NVARCHAR](12) NULL,
[PERNR] [NVARCHAR](8) NULL,
[WORKDATE] [NVARCHAR](8) NULL,
[CATSHOURS] [DECIMAL](7, 3) NULL,
[APDAT] [NVARCHAR](8) NULL,
[LAETM] [NVARCHAR](6) NULL,
[CATS_STATUS] [NVARCHAR](2) NULL,
[APPR_STATUS] [NVARCHAR](2) NULL
);
WITH CTE
AS (SELECT a.id,
a.[COUNTER],
a.REFCOUNTER,
a.id AS Findid
FROM dbo.CATSDB A
UNION ALL
SELECT b.id,
a.[COUNTER],
a.[refCOUNTER],
a.id
FROM dbo.CATSDB A
INNER JOIN CTE b ON(a.REFCOUNTER = b.[COUNTER])
WHERE a.id >= b.Findid),
CTE1
AS (SELECT id,
MAX(Findid) Findid
FROM CTE
GROUP BY id)
INSERT INTO #TEMP
(UpdateID,
FINDID,
[COUNTER],
[REFCOUNTER],
[PERNR],
[WORKDATE],
[CATSHOURS],
[APDAT],
[LAETM],
[CATS_STATUS],
[APPR_STATUS]
)
SELECT c1.ID,
c1.FINDID,
a.COUNTER,
a.REFCOUNTER,
a.PERNR,
a.WORKDATE,
a.CATSHOURS,
a.APDAT,
a.LAETM,
a.CATS_STATUS,
a.APPR_STATUS
FROM dbo.CATSDB A
INNER JOIN CTE1 c1 ON a.id = c1.Findid;
BEGIN TRY
BEGIN TRAN;
UPDATE A
SET
[REFCOUNTER] = b.REFCOUNTER,
[PERNR] = b.PERNR,
[WORKDATE] = b.WORKDATE,
[CATSHOURS] = b.CATSHOURS,
[APDAT] = b.APDAT,
[LAETM] = b.LAETM,
[CATS_STATUS] = b.CATS_STATUS,
[APPR_STATUS] = b.APPR_STATUS
FROM CATSDB A
INNER JOIN #TEMP B ON a.id = b.UpdateID;
-- this is only test query
SELECT c1.UpdateID AS UpdateID,
a.*
FROM dbo.CATSDB A
INNER JOIN #TEMP c1 ON a.id = c1.Findid;
IF(@@trancount > 0)
ROLLBACK; -- commit
END TRY
BEGIN CATCH
IF(@@trancount > 0)
ROLLBACK;
END CATCH;
#Temp should be permanent table.
IMO,您的表非常需要身份列,該列應該是身份和聚集索引。
你可以嘗試,你可以改變它。
REFCOUNTER,COUNTER
應該是非聚集索引。
只有在優化查詢並在索引上方使用適當的 PLAN 之后,才能提高性能。
適當的計划:您應該使用Recursive or RBAR
並一次更新數百萬條記錄,還是應該Batch update
?
您可以首先使用Rollback
測試包含數百萬行的腳本。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.