简体   繁体   中英

Data Purging in SQL Server Table

I have the a Requirement where I have to Delete in batches in sql server, and also track the number of count affected in the end. My Sample Code is as Follows:

Declare @count int
Declare @deletecount int

set @count=0

While(1=1)
BEGIN

BEGIN TRY

BEGIN TRAN
DELETE TOP 1000 FROM --CONDITION

SET @COUNT = @COUNT+@@ROWCOUNT

IF (@@ROWCOUNT)=0
Break;

COMMIT

END CATCH

BEGIN CATCH

ROLLBACK;
END CATCH

END

set @deletecount=@COUNT

Above Code Works fine, but how to keep track of @deletecount if Rollback happens in one of the batch.

Some potential issues with the code:

  1. while loop never exits/breaks
declare @tbl table(foo bit); --deleting from an empty table
declare @COUNT int; 
declare @iteration int = 0;

While(1=1)
BEGIN
    --loop safety net
    if @iteration >= 100
    begin
        break --exit after 100 iterations
    end
    select @iteration = @iteration+1

    BEGIN TRY

        DELETE TOP (1000) FROM @tbl; --@tbl is empty

        SET @COUNT = @COUNT+@@ROWCOUNT;-- <-- this always sets @@rowcount = 1

        IF (@@ROWCOUNT)=0 -- ... so this can never be 0 --> will never exit the loop
        Break;

    END TRY
    BEGIN CATCH
    END CATCH

END --end while

select @iteration as iterations;
  1. Open transaction at the end of the batch
declare @tbl table(foo bit); --<-- empty table
declare @COUNT int;
declare @myrowcount int; 
declare @iteration int = 0;


While(1=1)
BEGIN
    --loop safety net
    if @iteration >= 100
    begin
        break --exit after 100 iterations
    end
    select @iteration = @iteration+1

    BEGIN TRY
        BEGIN TRAN --<-- transaction begins on each iteration
        
        DELETE TOP (1000) FROM @tbl; --@tbl is empty
        
        set @myrowcount = @@rowcount;
        
        SET @COUNT = @COUNT+@myrowcount;

        IF (@myrowcount)=0
        Break; --<-- if this breaks... transaction is neither committed nor rolledback

        COMMIT --<-- commited on each iteration, when no error
    END TRY
    BEGIN CATCH
        ROLLBACK --<-- or rolled back on each iteration error
    END CATCH

END --end while

select @iteration as iterations, @@trancount as open_transactions;

rollback transaction;
  1. loop keeps trying to delete the same erroneous batch (over and over again) and never exits
create table parent (id int primary key clustered);
create table child (parentid int references parent(id));
go

--some parents
insert into parent(id)
select top (200) row_number() over(order by @@spid)
from sys.all_objects;
--and a child
insert into child(parentid) values (75)
go

declare @COUNT int; 
declare @myrowcount int;
declare @errormsg nvarchar(max);
declare @iteration int = 0;


While(1=1)
BEGIN
    --loop safety net
    if @iteration >= 100
    begin
        break --exit after 100 iterations
    end
    select @iteration = @iteration+1

    BEGIN TRY

        DELETE TOP (10) t --changed the batch size..for the example
        from (select top (10000) * from parent order by id) as t;
        set @myrowcount = @@rowcount;
        
        SET @COUNT = @COUNT+@myrowcount;

        IF (@myrowcount)=0
        Break;

    END TRY
    BEGIN CATCH
        select @errormsg = isnull(@errormsg, '') + error_message();
    END CATCH

END --end while

select @iteration as iterations, @errormsg as errormsg;

--some parents have been deleted (8th batch kept failing, there is an fk to parent 75)
select *
from parent
go
drop table child
drop table parent
go

You could rectify all the above issues...(last one, to skip erroneous batches being a bit more "difficult"), by implementing logic in the where clause (eg. exclude rows which are referenced etc) according to your model. Trying to implement a straight deletion, without any rules, and skipping failures makes it a bit harder.

Just an example: when a batch fails, after 3 attempts, take another deletion route. Since each deletion is atomic, transactions are not really needed here (besides, if the batch is called from a parent module, a plain ROLLBACK in the batch would "invalidate" any transactions opened in the parent module, before the batch execution)

create table parent (id int primary key clustered);
create table child (parentid int references parent(id));
go

--some parents
insert into parent(id)
select top (200) row_number() over(order by @@spid)
from sys.all_objects;
--and two children
insert into child(parentid) values (75), (115) --, (9), (18) --: add 9 and 18 and nothing gets deleted, alternative route in the example does not work
go


declare @COUNT int;
declare @myrowcount int; 
declare @iteration int = 0;
declare @errormsg nvarchar(max);
declare @ierrorcnt int=0;

declare @deletedids table(id int);
declare @lastsuccessfullydeletedid int = 0;

select @COUNT = 0;
While(1=1)
BEGIN
    --loop safety net
    if @iteration >= 100
    begin
        break --exit after 100 iterations
    end
    select @iteration = @iteration+1

    BEGIN TRY
        
        --when 10 consecutive errors in a single iteration..just quit the loop
        if @ierrorcnt >= 10
        begin
            break;
        end
        
        BEGIN TRAN --<-- transaction begins on each iteration
        
        if @ierrorcnt >= 3 --when 3 consecutive errors in the iteration..try to bypass the current batch
        begin
    
            delete top (10) t
            output deleted.id into @deletedids(id)
            from (select top (10) * from (select top (2*10) * from parent where id > @lastsuccessfullydeletedid order by id) as a order by id desc) as t
    
            select @myrowcount = count(*), @lastsuccessfullydeletedid = max(id)
            from @deletedids;
            
            delete from @deletedids;
        end
        else
        begin
            DELETE TOP (10) FROM parent where id > @lastsuccessfullydeletedid;
            set @myrowcount = @@rowcount; 
        end
        
        SET @COUNT = @COUNT+@myrowcount;
        
        COMMIT --<-- commited on each iteration, when no error
        
        IF (@myrowcount)=0 and @ierrorcnt = 0
        Break;
        
        set @ierrorcnt = 0; --everything ok, set iteration error counter to 0
    END TRY
    BEGIN CATCH
        ROLLBACK --<-- or rolled back on each iteration error
        if @ierrorcnt = 0
        begin
            select @errormsg = isnull(@errormsg, '') +';'+ error_message();
        end
        set @ierrorcnt = @ierrorcnt + 1; --error, increase the iteration error counter
        
    END CATCH

END --end while

select @iteration as iterations, @@trancount as open_transactions;
select @iteration as iterations, @errormsg as errormsg;

--some parents have been deleted
select * /*2 failed batches, 20 rows left*/
from parent

select @COUNT as [count/deleted] --<--this is also the deleted count

go
drop table child
drop table parent
go

..without any errors, in your original code, @count = @deletedcount

declare @tbl table(foo int); --<-- empty table
insert into @tbl(foo)
select top (100000) row_number() over(order by @@spid)
from sys.all_objects as a
cross join sys.all_objects as b;

--batch 1000, max iterations for the @tbl count
declare @maxiterations int;
select @maxiterations = 1+count(*) / 1000
from @tbl

declare @COUNT int;
declare @deletedcount int;
declare @myrowcount int; 
declare @iteration int = 0;


select @COUNT = 0, @deletedcount = 0;

While(1=1)
BEGIN
    --loop safety net
    if @iteration >= @maxiterations
    begin
        break --exit after @maxiterations
    end
    select @iteration = @iteration+1;

    BEGIN TRY
        
        BEGIN TRAN
        
        DELETE TOP (1000) FROM @tbl
        where foo%5 = 0 ; 
        
        set @myrowcount = @@rowcount;
        
        SET @COUNT = @COUNT+@myrowcount;
        set @deletedcount = @deletedcount + @myrowcount;

       COMMIT 
       
        IF @myrowcount=0
        Break;
 
    END TRY
    BEGIN CATCH
        ROLLBACK 
    END CATCH

END --end while

select @iteration as iterations, @@trancount as open_transactions;
select @count as _count, @deletedcount as deletedcount;

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM