简体   繁体   中英

Split a delimited string into a table with multiple rows and columns

Please could you assist i am new to SQL and am faced with the scenario below.I have used google and tried to find a solution but have failed.

I have a temporary table named TEMP with a single column named results and rows depending on however long the csv string may be. When you SELECT * FROM #TEMP (The temporary table) it returns data like below:

results

88.47,1,263759,10.00|303.53,2,264051,13.00|147.92,3,264052,6.00|43.26,4,268394,10.00| 127.7,5,269229,4.00|

Please use link below to view what results look like directly from the database:
http://design.northdurban.com/DatabaseResult.png

I need a solution that reads this data from the existing temporary table and insert it into another temporary table with rows and columns like in the link below for example:

The required output is displayed in the link below

http://design.northdurban.com/capture.png

Please could you help as i am sure this post will assist many other users as i have not found any existing solution.

First convert the string to rows using the demiliter |

DECLARE @str VARCHAR(max)='88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|'

SELECT Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)')))
FROM   (SELECT Cast ('<M>' + Replace(@str, '|', '</M><M>') + '</M>' AS XML) AS Data) AS A
       CROSS APPLY Data.nodes ('/M') AS Split(a) 

then convert the result to different column using parsename trick

SELECT Id,c1,c2,c3
FROM  (SELECT Id=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 4), ';', '.'),
              C1=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 3), ';', '.'),
              c2=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 2), ';', '.'),
              c3=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 1), ';', '.')
       FROM   (SELECT Cast ('<M>' + Replace(@str, '|', '</M><M>') + '</M>' AS XML) AS Data) AS A
              CROSS APPLY Data.nodes ('/M') AS Split(a)) a
WHERE  id IS NOT NULL 

SQLFIDDLE DEMO

Update: To have a better performance try this.

SELECT c1,c2,c3,c4
FROM   (SELECT C1=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 4), ';', '.'),
               C2=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 3), ';', '.'),
               C3=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 2), ';', '.'),
               C4=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 1), ';', '.')
        FROM   (SELECT Split.a.value('.', 'VARCHAR(100)') col
                FROM   (SELECT Cast ('<M>' + Replace(@str, '|', '</M><M>') + '</M>' AS XML) AS Data) AS A
                       CROSS APPLY Data.nodes ('/M') AS Split(a))v) a
WHERE  c1 IS NOT NULL; 

Update2: To parse more than one row from the table use this code.

Sample table with data

create table #test(string varchar(8000))
insert into #test values
('88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|'),
('88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|')

Query

SELECT c1,c2,c3,c4
FROM   (SELECT C1=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 4), ';', '.'),
               C2=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 3), ';', '.'),
               C3=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 2), ';', '.'),
               C4=Replace(Parsename(Replace(Replace(col, '.', ';'), ',', '.'), 1), ';', '.')
        FROM   (SELECT Split.a.value('.', 'VARCHAR(100)') col
                FROM   (SELECT Cast ('<M>' + Replace(string, '|', '</M><M>') + '</M>' AS XML)
                 AS Data from #test) AS A
                       CROSS APPLY Data.nodes ('/M') AS Split(a))v) a
WHERE  c1 IS NOT NULL; 

This will only work if you you have 4 columns. In this situation you can do the following

SELECT REPLACE(PARSENAME(REPLACE(REPLACE(ColumnName, '.', '~'), ',', '.'), 4), '~', '.'),
 REPLACE(PARSENAME(REPLACE(REPLACE(ColumnName, '.', '~'), ',', '.'), 3), '~', '.'),
 REPLACE(PARSENAME(REPLACE(REPLACE(ColumnName, '.', '~'), ',', '.'), 2), '~', '.'),
 REPLACE(PARSENAME(REPLACE(REPLACE(ColumnName, '.', '~'), ',', '.'), 1), '~', '.')
From #TEMP

You can write a table value function to parse the string like this:

CREATE FUNCTION dbo.parseData ( @stringToSplit VARCHAR(MAX) )
RETURNS
    @return TABLE (ID int, Column1 real, Column2 int, Column3 int, Column4 real)
AS
BEGIN

    DECLARE @char char;
    DECLARE @len int = LEN(@stringToSplit);    

    DECLARE @buffer varchar(50) = '';

    DECLARE @field int = 1;

    DECLARE @Column1 real
    DECLARE @Column2 int
    DECLARE @Column3 int
    DECLARE @Column4 real

    DECLARE @row int = 1

    DECLARE @i int = 1;
    WHILE @i <= @len BEGIN

        SELECT @char = SUBSTRING(@stringToSplit, @i, 1)

        IF @char = ','
        BEGIN
            IF @field = 1
                SET @Column1 = CONVERT(real, @buffer);
            ELSE IF @field = 2
                SET @Column2 = CONVERT(int, @buffer);
            ELSE IF @field = 3
                SET @Column3 = CONVERT(int, @buffer);    
            SET @buffer = '';
            SET @field = @field + 1
        END
        ELSE IF @char = '|'
        BEGIN
            SET @Column4 = CONVERT(real, @buffer);
            INSERT INTO @return (ID, Column1, Column2, Column3, Column4)
            VALUES (@row, @Column1, @Column2, @Column3, @Column4);
            SET @buffer = '';
            SET @row = @row + 1
            SET @field = 1
        END
        ELSE
        BEGIN
            SET @buffer = @buffer + @char
        END

        SET @i = @i + 1;
    END

    RETURN
END
GO

And then call that function like this:

SELECT Col1 = '88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|'
INTO #Temp1;

INSERT INTO #Temp1
VALUES ('88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|')

SELECT data.*
INTO #Temp2
FROM #Temp1 CROSS APPLY parseData(#Temp1.Col1) as data

SELECT *
FROM #Temp2

DROP TABLE #Temp1
DROP TABLE #Temp2

Performance:

So I ran a performance test of this technique against the technique described by NoDisplayName. Over 10,000 iterations my technique took 13,826 and NoDisplayName's took 36,176 so mine only takes 38% of the time NoDisplayName's does.

To test this I used an Azure database and ran the following script.

-- First two queries to check the results are the same.
-- Note the Parsename technique returns strings rather than reals which is why
-- the last column has .00 at the end of the numbers in the Parsename tecnique.
DECLARE @str VARCHAR(max)='88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.01|'

SELECT c1,c2,c3, c4
    FROM  (SELECT C1=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 4), ';', '.'),
                  C2=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 3), ';', '.'),
                  C3=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 2), ';', '.'),
                  C4=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 1), ';', '.')
           FROM   (SELECT Cast ('<M>' + Replace(@str, '|', '</M><M>') + '</M>' AS XML) AS Data) AS A
                  CROSS APPLY Data.nodes ('/M') AS Split(a)) a
    WHERE  c1 IS NOT NULL;

SELECT *
FROM dbo.parseData(@str)
GO

-- Now lets time the Parsename method over 10,000 itterations
SET NOCOUNT ON;

DECLARE @str VARCHAR(max)='88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|'

DECLARE @i int = 0
declare @table table (c1 decimal, c2 int, c3 int, c4 decimal)

DECLARE @Start datetime = GETDATE();

while @i < 1000
begin

    INSERT INTO @table
    SELECT c1,c2,c3, c4
    FROM  (SELECT C1=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 4), ';', '.'),
                  C2=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 3), ';', '.'),
                  C3=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 2), ';', '.'),
                  C4=Replace(Parsename(Replace(Replace(Rtrim(Ltrim(Split.a.value('.', 'VARCHAR(100)'))), '.', ';'), ',', '.'), 1), ';', '.')
           FROM   (SELECT Cast ('<M>' + Replace(@str, '|', '</M><M>') + '</M>' AS XML) AS Data) AS A
                  CROSS APPLY Data.nodes ('/M') AS Split(a)) a
    WHERE  c1 IS NOT NULL;

    DELETE FROM @table;

    set @i = @i + 1;
end

DECLARE @End datetime = GETDATE()
PRINT CONVERT(nvarchar(50),@Start,126) + ' - ' + convert(nvarchar(50),@End,126) + ' - ' + convert(nvarchar(50), DATEDIFF(ms, @start, @end))
GO

-- Now the my technique over 10,000 itterations
SET NOCOUNT ON;

DECLARE @str VARCHAR(max)='88.47,1,263759,10.00| 303.53,2,264051,13.00| 147.92,3,264052,6.00| 43.26,4,268394,10.00| 127.7,5,269229,4.00|'

DECLARE @i int = 0
declare @table table (c1 decimal, c2 int, c3 int, c4 decimal)

DECLARE @Start datetime = GETDATE();

while @i < 1000
begin

    INSERT INTO @table
    SELECT *
    FROM dbo.parseData(@str)
    DELETE FROM @table;

    set @i = @i + 1;
end
DECLARE @End datetime = GETDATE()
PRINT CONVERT(nvarchar(50),@Start,126) + ' - ' + convert(nvarchar(50),@End,126) + ' - ' + convert(nvarchar(50), DATEDIFF(ms, @start, @end))
GO

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM