Python 在多個嵌套循環中集成多處理

Question

由於我缺乏編碼技能，我正在制作一個非常低效的拼字游戲單詞生成器。 在這個程序中，用戶輸入一系列字母，程序使用蠻力找到每個有效的拼字游戲單詞。 為了加快這個過程，我想實現多處理，但無法讓它成功工作。 工作的非多處理代碼如下

from multiprocessing import Process
usrList = input("type the letters you have     ")
usrList = list(usrList.upper())
usrList.sort()
print(usrList)    


storedList = []

def word2 (usrList):
    print('trying to find two letter words')
    for i in range(0,len(usrList)):
        for j in range(0,len(usrList)):
            if i != j:
                if str(usrList[i])+str(usrList[j]) not in storedList and str(usrList[i])+str(usrList[j])+'\n' in dicList:
                    print(str(usrList[i])+str(usrList[j]))
                    storedList.append(str(usrList[i])+str(usrList[j]))

def word3(usrList):
    print('trying to find three leter words')
    if len(usrList) > 2:
        for i in range(0,len(usrList)):
            for j in range(0,len(usrList)):
                for k in range(0,len(usrList)):
                    if i != j and i != k and j != k:
                        if  str(usrList[i])+str(usrList[j])+str(usrList[k]) not in storedList and str(usrList[i])+str(usrList[j])+str(usrList[k])+'\n' in dicList :
                            print(str(usrList[i])+str(usrList[j])+str(usrList[k]))
                            storedList.append(str(usrList[i])+str(usrList[j])+str(usrList[k]))

def word4(usrList):
    print('trying to find four letter words')
    if len(usrList) > 3:
        for i in range(0,len(usrList)):
            for j in range(0,len(usrList)):
                for k in range(0,len(usrList)):
                    for l in range(0,len(usrList)):
                        if i !=j and i != k and i!= l and j!= k and j!= l and k != l:
                            if str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l]) not in storedList and str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+'\n' in dicList: 
                                print(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l]))
                                storedList.append(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l]))


def word5(usrList):
    print('trying to find five letter words')
    if len(usrList) > 4:
        for i in range(0,len(usrList)):
            for j in range(0,len(usrList)):
                for k in range(0,len(usrList)):
                    for l in range(0,len(usrList)):
                        for m in range(0,len(usrList)):
                            if i !=j and i != k and i!= l and i != m and j!= k and j!= l and j!= m and k != l and k != m and l !=m:
                                if str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m]) not in storedList and str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+'\n' in dicList:
                                    print(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m]))
                                    storedList.append(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m]))


def word6(usrList):
    print('trying to find six letter words')
    if len(usrList) > 5:
        for i in range(0,len(usrList)):
            for j in range(0,len(usrList)):
                for k in range(0,len(usrList)):
                    for l in range(0,len(usrList)):
                        for m in range(0,len(usrList)):
                            for n in range(0,len(usrList)):
                                if i !=j and i != k and i!= l and i != m and i != n and j!= k and j!= l and j!= m and j !=n and k != l and k != m and k != n and l !=m and l != n and m!= n:
                                    if str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n]) not in storedList and str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n])+'\n' in dicList:
                                        print(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n]))
                                        storedList.append(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n]))

def word7(usrList):
    print('trying to find seven letter words')
    if len(usrList) > 6:
        for i in range(0,len(usrList)):
            for j in range(0,len(usrList)):
                for k in range(0,len(usrList)):
                    for l in range(0,len(usrList)):
                        for m in range(0,len(usrList)):
                            for n in range(0,len(usrList)):
                                for o in range(0,len(usrList)):
                                    if i !=j and i != k and i!= l and i != m and i != n and i != 0 and j!= k and j!= l and j!= m and j !=n and j != o and k != l and k != m and k != n and k!= o and l !=m and l != n and l != 0 and m!= n and m != o and n != o:
                                        if str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n])+str(usrList[o]) not in storedList and str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n])+str(usrList[o])+'\n' in dicList :
                                            print(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n])+str(usrList[o]))
                                            storedList.append(str(usrList[i])+str(usrList[j])+str(usrList[k])+str(usrList[l])+str(usrList[m])+str(usrList[n])+str(usrList[o]))        



f = 'ScrabbleDic.txt'
with open(f,'r') as file:
    dicList=[]
    for line in file:
        dicList.append(line)
    file.close()

if __name__ == '__main__':
    word7(usrList)
    word6(usrList)
    word5(usrList)
    word4(usrList)
    word3(usrList)
    word2(usrList)

Answer 1

一般來說，你從重新設計算法中獲得的價值往往比使用多處理獲得的價值更多。

這是您的代碼的較短實現。 我已經對 usrList 進行了硬編碼，並且由於我無權訪問您正在使用的字典文件，因此我使用的是 MacOS 附帶的默認字典文件。 我沒有編寫嵌套循環和檢查重復索引，而是使用 itertools 模塊生成給定長度的 usrList 的所有排列。 這不會顯着加快代碼速度，但可以更輕松地演示可能的更改：

import itertools

usrList = ['P', 'Y', 'T', 'H', 'O', 'N', 'S']
storedList = []
with open('/usr/share/dict/words', 'r') as dict_file:
    dicList = [word.strip().upper() for word in dict_file]


def possible_words(length):
    for letter_permutation in itertools.permutations(usrList, length):
        word = ''.join(letter_permutation)  # itertools returns a tuple, not a string
        if word in dicList:  # This requires a linear search through the list
            storedList.append(word)


for word_length in range(2, 8):  # Note that the upper bound is 7 letters, not 8
    possible_words(word_length)

在我的 Macbook 上運行大約需要47.4 秒。 為了加快速度，讓我們按照您的建議添加多處理。 有幾種使用多處理的方法，但最容易實現的可能是創建一個池並調用它的map() function。

如果您不習慣使用其他函數作為 arguments 的函數，此語法可能看起來有點奇怪。 實際上，我們正在創建一個工人池，然后為該池提供一個 function 和一系列 arguments 以在該 function 上使用。 然后將各個 function 調用拆分到池中，而不是按順序調用：

import itertools
import multiprocessing

usrList = ['P', 'Y', 'T', 'H', 'O', 'N', 'S']
storedList = []
with open('/usr/share/dict/words', 'r') as dict_file:
    dicList = [word.strip().upper() for word in dict_file]


def possible_words(length):
    for letter_permutation in itertools.permutations(usrList, length):
        word = ''.join(letter_permutation)
        if word in dicList:
            storedList.append(word)


if __name__ == '__main__':  # multiprocessing complains if this isn't isolated
    with multiprocessing.Pool(6) as p:  # Creates 6 worker processes
        p.map(possible_words, range(2, 8))  # Each process calls possible_words() with a different input

這在我的 Macbook 上運行32.3秒。 我們縮短了四分之一的時間，可能有一些方法可以從這種方法中擠出更多的性能。 但也值得研究一下算法，看看是否有其他方法可以加快速度。

現在，您正在創建一個字典單詞列表。 當您檢查潛在單詞是否在該列表中時，Python 必須掃描整個列表，直到找到匹配項或到達末尾。 我的內置字典有 235K 單詞，所以這意味着它必須對它生成的每個無意義的字母組合進行 235K 字符串比較！

如果從使用列表切換到集合，Python 可以改為使用 hash function 在幾乎恆定的時間內查找一個值，而不是在每個條目的掃描時間。 讓我們嘗試一下，而不是多處理：

import itertools

usrList = ['P', 'Y', 'T', 'H', 'O', 'N', 'S']
storedList = []
with open('/usr/share/dict/words', 'r') as dict_file:
    dicSet = {word.strip().upper() for word in dict_file}   # By changing [] to {}, this is now a set


def possible_words(length):
    for letter_permutation in itertools.permutations(usrList, length):
        word = ''.join(letter_permutation)
        if word in dicSet:  # This now only does 1 check, not 235,000
            storedList.append(word)


for word_length in range(2, 8):
    possible_words(word_length)

這個版本在0.005 秒內運行，只需更改兩個字符！

總之，多處理是一個有用的工具，但它可能不應該是您嘗試的第一件事。 通過思考您正在使用的數據結構和算法以及瓶頸可能在哪里，您通常會獲得更好的結果。

Answer 2

解決此類難題的經典解決方案不是檢查每個可能的排列，而是將樣本字母和字典中的單詞轉換為一致的可搜索排列 - 通過對它們的字符進行排序！

現在，您無需在字典中搜索“PYTHONS”的每個排列，只需對字母進行排序以創建鍵“HNOPSTY”，所有具有相同鍵的有效單詞都將在 map 中找到。

使用 defaultdict，很容易創建字典中所有單詞的查找 map。 我們使用defaultdict(list)而不是 dict 因為多個單詞可能排序到同一個鍵。

from collections import defaultdict
dictionary_mapping = defaultdict(list)

# assuming dictionary is a list of all valid words, regardless of length
for word in dictionary:
    key = ''.join(sorted(word.upper()))
    dictionary_mapping[key].append(word)

search_word = "PYTHONS"
search_key = ''.join(sorted(search_word.upper()))

# get all words that are anagrams of the search word, or the empty list if none
print(dictionary_mapping.get(search_key, []))

Python 在多個嵌套循環中集成多處理

問題描述

2 個解決方案

解決方案1
4 2020-04-29 06:08:35

解決方案2
0 2020-04-29 17:22:04

Python 在多個嵌套循環中集成多處理

問題描述

2 個解決方案

解決方案1 4 2020-04-29 06:08:35

解決方案2 0 2020-04-29 17:22:04

解決方案1
4 2020-04-29 06:08:35

解決方案2
0 2020-04-29 17:22:04