简体   繁体   中英

Load table into array and combine all duplicates- Excel VBA

I have many tables that need data combined. I have combined some of the tables into a test table to test the code. I am sorting the unique values in column 'B' az before running my code. It is very slow with only ~3500 records. The actual total is over 100,000 records. I'm curious to see if I can load the whole table to an array and perform the same functions, but I'm not sure if it is possible.

My table structure is:

Unique ID First Last Company etc.
A1 John
A1 Doe
A1 company1
A2 Jay Varnado
A3 Joe Snuffy
A3 M. company2

The desired outcome is:

Unique ID First Last Company etc.
A1 John Doe company1
A1 John Doe company1
A1 John Doe company1
A2 Jay Varnado
A3 Joe M. Snuffy company2
A3 Joe M. Snuffy company2
Dim cel As Range, rng As Range, r As Range
Dim arr(14) As String, temp As String
Dim i As Long, b As Long, j As Long, lRow As Long, lRec As Long, c As Long
Dim ii As Integer, v As Integer, col As Integer
Dim dict As Scripting.Dictionary
Dim str() As String
Dim BenchMark As Double

BenchMark = Timer

lRow = Sheet3.Cells(Rows.Count, 1).End(xlUp).Row
For c = 3 To lRow
    Debug.Print c
    Set cel = Sheet3.Range("B" & c)
    
    If Trim(cel.Offset(1, 0)) = Trim(cel.Value) Then
    
    'Determine range of like keys
        i = 1
        Do Until cel.Offset(i, 0).Value <> cel.Value
            i = i + 1
        Loop
        
        lRec = cel.Offset(i, 0).Row - 1
    
    'Compare data
        For i = 3 To 16
            ii = i - 3
            
        'Create rng and loop through each column
            Set rng = Sheet3.Range(Sheet3.Cells(c, i), Sheet3.Cells(lRec, i))
            Set dict = New Scripting.Dictionary 'CreateObject("Scripting.Dictionary")
            
            For Each r In rng
                If dict.Exists(r.Value) = False And Len(r.Value) > 0 Then
                    dict.Add r.Value, r.Value
                End If
            Next r
            
            
        'Add to string array
            'Debug.Print Split(Join(dict.Keys, "|"), "|")
            str = Split(Join(dict.Keys, ","), ",")
            arr(ii) = Join(str, ",")
            Set dict = Nothing
        Next i
            
    'Set range equal to array
        For j = cel.Row To lRec
            v = 0
            
            For col = 3 To 16
                Sheet3.Cells(j, col) = arr(v)
                Sheet3.Cells(j, col) = arr(v)
                v = v + 1
            Next col
        
        Next j
'Go to last cell in range
    c = lRec
    Else: GoTo NextCel
    End If
        
'Clear array
NextCel:
    On Error Resume Next
        'Debug.Print Join(arr, ",")
        Erase arr
    On Error GoTo 0
Next c

MsgBox ("Done in " & Timer - BenchMark)

End Sub

Assuming ID is in column B and output as single lines per ID to Sheet1 or with duplicates to Sheet2.

Option Explicit

Sub Process()
    
    Dim dict As Object, key
    Dim iLastRow As Long, n As Long, r As Long
    Dim c As Integer, s As String
    Dim arIn, arOut, t0 As Single: t0 = Timer

    Set dict = CreateObject("Scripting.Dictionary")

    iLastRow = Sheet3.Cells(Rows.Count, "B").End(xlUp).Row
    arIn = Sheet3.Range("A1").Resize(iLastRow, 16).Value2
    n = 0
    ' determine number of unique ids
    For r = 3 To iLastRow
        key = Trim(arIn(r, 2))
        If Len(key) > 0 Then
            If Not dict.exists(key) Then
                n = n + 1
                dict.Add key, n
             End If
        End If
    Next

    ' dimension output array and fill
    ReDim arOut(1 To n, 1 To 15)
    For r = 3 To iLastRow
        key = Trim(arIn(r, 2))
        n = dict(key)
        arOut(n, 1) = key
        ' concat columns
        For c = 3 To 16
            s = Trim(arIn(r, c))
            If Len(s) > 0 Then
                arOut(n, c - 1) = arOut(n, c - 1) & " " & s
            End If
        Next
    Next

    ' output to sheet1
    Sheet1.Range("A1").Resize(n, 15) = arOut
    MsgBox "Done in " & Format(Timer - t0, "0.0 secs")

    ' or with duplicates to sheet2
   For r = 3 To iLastRow
        key = Trim(arIn(r, 2))
        n = dict(key)
        Sheet2.Cells(r, 2) = key
        For c = 3 To 16
            Sheet2.Cells(r, c) = arOut(n, c - 1)
        Next
    Next
    
End Sub

This assumes the data is on Sheet1 starting in A1 .

Not sure how efficient it is.

Option Explicit

Sub Test()
Dim rngDst As Range
Dim dicIDs As Object
Dim dicData As Object
Dim arrData As Variant
Dim arrCols As Variant
Dim idxRow As Long
Dim idxCol As Long
Dim ky As Variant
Dim fld As Variant
Dim cnt As Long

    With Sheets("Sheet1").Range("A1").CurrentRegion
        arrCols = .Rows(1).Value
        arrData = .Offset(1).Resize(.Rows.Count - 1).Value
    End With

    Set dicIDs = CreateObject("Scripting.Dictionary")

    For idxRow = LBound(arrData, 1) To UBound(arrData, 1)

        ky = arrData(idxRow, 1)

        If dicIDs.exists(ky) Then
            Set dicData = dicIDs(ky)
            cnt = cnt + 1
        Else
            Set dicData = CreateBlankDic(arrCols)
        End If

        For idxCol = LBound(arrData, 2) To UBound(arrData, 2)

            fld = arrCols(1, idxCol)

            If arrData(idxRow, idxCol) <> "" Then
                dicData(fld) = arrData(idxRow, idxCol)
            End If

        Next idxCol

        Set dicIDs(ky) = dicData

    Next idxRow

    Set rngDst = Sheets("Sheet1").Range("A1").Offset(, UBound(arrCols, 2) + 2)
    
    rngDst.Resize(1, UBound(arrCols, 2)).Value = arrCols

    Set rngDst = rngDst.Offset(1).Resize(cnt, UBound(arrCols, 2))
    
    ReDim arrData(1 To cnt, 1 To UBound(arrCols, 2))
    
    cnt = 1

    For Each ky In dicIDs.keys
        Set dicData = dicIDs(ky)
        idxCol = 1
        For Each fld In dicData.keys
            arrData(cnt, idxCol) = dicData(fld)
            idxCol = idxCol + 1
        Next fld
        cnt = cnt + 1
    Next ky
       
    rngDst.Value = arrData
    
End Sub

Function CreateBlankDic(arrKeys, Optional BlankVal = "") As Object
Dim dic As Object
Dim idxCol As Long

    Set dic = CreateObject("Scripting.Dictionary")

    For idxCol = LBound(arrKeys, 2) To UBound(arrKeys, 2)
        dic(arrKeys(1, idxCol)) = BlankVal
    Next idxCol

    Set CreateBlankDic = dic
    
End Function

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM