如何加快按兩個屬性分組的結構數組的總和 - macOS Swift

Question

我正在使用的代碼如下所示，但計算總和似乎非常慢 - 大約 20 秒。 有關如何加快速度的任何建議？

實際上它有點復雜，因為我需要創建一個包含所有原始屬性和更新為總和的精細結果 object。

struct PAData: Equatable, Hashable {
        let pCode: String
        let aCode: String
        let otherProperty1: String // Unique to pCode
        let otherProperty2: String // Unique to pCode
        let count: Int
        
        static func == (lhs: PAData, rhs: PAData) -> Bool {
                return
                    lhs.pCode == rhs.pCode &&
                    lhs.aCode == rhs.aCode
            }
        func hash(into hasher: inout Hasher) {
                hasher.combine(pCode)
                hasher.combine(aCode)
            }
}

// Group by aCode and pCode and sum count
func calcSum() {
    // Find the unique records based on pCode/aCode properties - very fast takes 0.1 second
    let unique = Set<PAData>(paData)

    // Now find the sum for each pCode/aCode group  - too slow takes 20 seconds to complete - how to speed this up ?
    // Really only needs to be done for those that have more than one record !??
    for key in unique {
                
        let sum = paData.filter({$0.pCode == key.pCode && $0.aCode == key.aCode}).map({$0.count}).reduce(0, +)

        let summary = PAData(pCode: key.pCode, aCode: key.aCode, count: sum)

        resultArray.append(summary)
    }
}

似乎使用 Dictionary.grouping() 很快，但隨后組合結果又很慢 - 仍然需要大約 35 秒。

struct PAData: Equatable, Hashable {
    let pCode: String
    let aCode: String
    let count: Int

    var key: String {
        return pCode + ":" + aCode
    }
    
    static func == (lhs: PAData, rhs: PAData) -> Bool {
            return
                lhs.pCode == rhs.pCode &&
                lhs.aCode == rhs.aCode
        }
    func hash(into hasher: inout Hasher) {
            hasher.combine(pCode)
            hasher.combine(aCode)
        }
}

    // Group by aCode and pCode and sum count
    func calcSum() {
        var grouped = Dictionary(grouping: paData, by:{$0.key})

        
        struct Item {
            let key: String
            let sum: Int
        }

        let resultArray = grouped.keys.map { (key) -> Item in
            let value = grouped[key]!
            return Item(key: key, sum: value.map{$0.facings}.reduce(0, +))
        }  
        
        // Find the unique records based on pCode/aCode properties - very fast takes 0.1 second
        let unique = Set<PAData>(paData)

        // Now we need to combined so we have the original properties as well as the sum but no duplicates - slow
        let results = unique.map({ rec -> PAData in
            let sum = resultArray.first(where: {$0.key == rec.key})?.sum ?? rec.facings
            return PAData(brandCode: rec.brandCode, assortmentCode: rec.assortmentCode, productCode: rec.productCode, productCategory: rec.productCategory, productDescription: rec.productDescription, facings: sum)
        })
  }

Answer 1

這應該可以解決問題：

func calcSum2(_ paData: [PAData]) -> [PAData] {
    let date = Date()
    let grouped = Dictionary(grouping: paData) { aPaData in
        return PAData(pCode: aPaData.pCode, aCode: aPaData.aCode, count: -2) //Here it's to take advantage of your Hashable
    }
    let resultArray = grouped.compactMap { (key: PAData, values: [PAData]) -> PAData in
        let sum = values.reduce(into: 0, { $0 += $1.count })
        return PAData(pCode: key.pCode, aCode: key.aCode, count: sum)
    }
    return resultArray
}

你的代碼有什么問題：

let unique = Set<PAData>(paData)
for key in unique {
            
    let sum = paData.filter({$0.pCode == key.pCode && $0.aCode == key.aCode}).map({$0.count}).reduce(0, +)

    let summary = PAData(pCode: key.pCode, aCode: key.aCode, count: sum)

    resultArray.append(summary)
}

可以改成：

let unique = Set<PAData>(paData)
for key in unique {
            
    let filtered = paData.filter({$0.pCode == key.pCode && $0.aCode == key.aCode})
    let countMap = filtered.map({$0.count})
    let sum = countMap.reduce(0, +)

    let summary = PAData(pCode: key.pCode, aCode: key.aCode, count: sum)

    resultArray.append(summary)
}

因此，如果您有 10k 個元素，並且每個元素具有相同的 pCode 和 aCode 的 1k 個元素。

過濾：10k 次迭代（您迭代所有pData ）
countMap: 1k 迭代（你迭代所有filtered的）
reduceInto: 1k 迭代（你迭代所有countMap ）

-> 對每個獨特的重復

你看到丟失的計算了嗎？

但是，如果您按照我建議的解決方案進行分組，那么您就是在分組，因此您不再需要過濾器。 我在同一個迭代中求和並映射，在你的情況下可能是： let sum = filtered.reduce(into: 0, { $0 += $1.count }) 。 在我給出的計數樣本中，每個唯一值保存了 1k 次迭代。

現在，我遇到的一個問題是，在我的示例代碼中， unique並沒有使每個aCode和pCode唯一（我不知道為什么）。它似乎沒有按應有的方式工作。 我還不知道為什么（我可能會編輯關於為什么的問題），但這仍然沒有那么優化，因為你仍然每次都會過濾。 理論上，您應該在filter中使用$0 == key ，因為您覆蓋了 equal 方法而不考慮count 。

我這邊的樣本測試：

let testArray = [PAData(pCode: "a", aCode: "b", count: 2),
                 PAData(pCode: "a", aCode: "b", count: 3),
                 PAData(pCode: "d", aCode: "c", count: 2)]
let setTest = Set<PAData>(testArray)

print("setTest: \(setTest)")
let groupTest = Dictionary(grouping: testArray) { aPaData in
    return PAData(pCode: aPaData.pCode, aCode: aPaData.aCode, count: -2)
}
print("groupTest: \(groupTest)")

結果很奇怪：

Dictionary:
{
    "PAData(pCode: \"a\", aCode: \"b\", count: -2)" =     (
        "PAData(pCode: \"a\", aCode: \"b\", count: 2)",
        "PAData(pCode: \"a\", aCode: \"b\", count: 3)"
    );
    "PAData(pCode: \"d\", aCode: \"c\", count: -2)" =     (
        "PAData(pCode: \"d\", aCode: \"c\", count: 2)"
    );
}
Set:
{(
    PAData(pCode: "a", aCode: "b", count: 3),
    PAData(pCode: "d", aCode: "c", count: 2),
    PAData(pCode: "a", aCode: "b", count: 2)
)}

如何加快按兩個屬性分組的結構數組的總和 - macOS Swift

問題描述

1 個解決方案

解決方案1
0 已采納 2021-05-21 12:31:06

如何加快按兩個屬性分組的結構數組的總和 - macOS Swift

問題描述

1 個解決方案

解決方案1 0 已采納 2021-05-21 12:31:06

解決方案1
0 已采納 2021-05-21 12:31:06