減少 function 的運行時間

Question

對於一個學校項目，我必須編寫一個 Wordle 游戲。 現在，我已經差不多完成了，但是我還需要完成一項要求。 某個 function 需要在 1 秒內完成，但是我的需要將近 6 秒才能完成。 function 稱為'smart_guess'，它需要兩個arguments：'wordlist' 和'targets'。 “Wordlist”是您可以猜測的單詞列表。 “目標”是一個可能的目標詞列表，基於之前的猜測。 當還沒有猜測時，targets 將等於 wordlist。 返回值是wordlist中出現的一個詞（即字符串）。 這應該是一個聰明的猜測，有助於在很少的回合內找到實際目標。

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])
    
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples

這是我想讓運行速度更快的 function。 為了更清楚，我將在此處添加我的整個程序：

import random

def load_words(file):
  result = set()
  with open(file) as f:
    for line in f.readlines():
       word = line.strip().lower()
       if word.isalpha() and word.isascii():
         result.add(word)
  return sorted(result)

def compare(guess, target):
    ''' Compare two words and give string with 'X' letter is in good place, 'O' not in good place but in word and '-': not in the word. '''
    result = list(target)
    index_list = list(range(len(guess)))
    letter_dict = {}
    for letter in target:
        letter_dict[letter] = target.count(letter)
        
    # Iterate list of indexes
    for idx in range(len(index_list)):
        # Look which letters are in good place
        if guess[idx] == target[idx]:
            # Decrease letter count
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Delete index from list add 'X'
            result[idx] = "X"
            index_list.remove(idx)
            
    for idx in index_list:
        #Check if letter still is in letter_dict and in target
        if guess[idx] in target and letter_dict[guess[idx]] > 0:
            # Remove lettercount from dict
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Add 'O' to place in guess_list 
            result[idx] = "O"
        else:
            result[idx] = "-"
            
    return  "".join(result)

dutch_words = load_words("wordlist.txt")
d6 = [word for word in dutch_words if len(word) == 6]
d6q = [word for word in d6 if word.startswith("q")]


def filter_targets(targets, guess_results):
   final_targets = []  
   for target in targets:
        #Create list with compared results
       temp_list = []
       for guess in guess_results:
          temp_list.append(compare(guess, target))   
        #Compare results are the same, add to final_targets  
       if temp_list == list(guess_results.values()):
          final_targets.append(target)
   return final_targets
    
def distributions(guess, targets):
    distr_dict = {}
    #Check how many times compared gives result
    for target in targets:
        result = compare(guess, target)
        if result not in list(distr_dict.keys()):
            distr_dict[result] = 1
        else:
            distr_dict[result] += 1 
    return distr_dict

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])

    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples


def simulate_game(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0] == target[0]]
    if target not in wordlist:
      raise ValueError("Target is not in wordlist, thus impossible to guess.")
    targets = wordlist.copy()
    turns = 0
    while True:
       num_words = len(targets)
       print(f"There {'is' if num_words==1 else 'are'} {num_words} possible"
                f" target{'s' if num_words!=1 else ''} left.")
       turns += 1
       guess = smart_guess(wordlist, targets)
       print("My guess is: ", guess.upper())
       result = compare(guess, target)
       print("Correctness: ", result)
       if result == n * "X":
          print(f"Target was guessed in {turns} "
                 f"turn{'s' if turns!=1 else ''}.")
          break
       else:
          targets = filter_targets(targets, {guess: result})

def count_turns(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0]==target[0]]
    targets = wordlist.copy()
    turns = 0
    while True:
      turns += 1
      if turns > 100:
        raise RuntimeError("This is going nowhere: 100 turns used.")
      guess = smart_guess(wordlist, targets)
      result = compare(guess, target)
      if result == n * "X":
        break
      else:
          targets = filter_targets(targets, {guess: result})
    return turns

def turn_count_simulation(word_length, wordlist, runs=100):
    wordlist = [word for word in wordlist if len(word) == word_length]
    total = 0
    for _ in range(runs):
       target = random.choice(wordlist)
       total += count_turns(target, wordlist)
    return total/runs

Answer 1

減少運行時間有很多步驟。

第一步：使vars成為一個襯墊。

可以說我有x=1和y=2 。 而不是將它們寫在 2 行上，而是x,y=1,2 。

第 2 步：刪除語句中的 bool 值。 假設我有一個 while 語句， while x == True: 。 我會用while x:替換它，如果 x 是 false 那么我會做， while not x: 。

第三步：做一個襯里。 Python 線路多的時候比較慢。 我們可以使用一個襯墊去除大約 40% 的線條。 而不是做x+=1 ， y+=1之類的事情。 我們可以做x+=1; y+=1 x+=1; y+=1 。

結論：python中代碼優化的步驟很多，這只是其中的三個。 我建議您研究這三個步驟，以大致了解如何實施它們。

Answer 2

您需要的是分析您的代碼以了解哪里花費的時間太長。 因此，您不會浪費時間嘗試優化不會真正影響總時序的部分。

讓我們嘗試只生成一些“單詞”（1716：字母表前 13 個字母中的 6 個的組合）和配置文件smart_guess ：

import cProfile
from itertools import combinations
ws=[''.join(c) for c in combinations('ABCDEFGHIJKLM',6)]

cProfile.run('smart_guess(ws,ws)')
         1053127 function calls in 0.749 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.749    0.749 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:115(<listcomp>)
    87516    0.372    0.000    0.500    0.000 wordle_long.py:12(compare)
       51    0.242    0.005    0.749    0.015 wordle_long.py:59(distributions)
        1    0.000    0.000    0.749    0.749 wordle_long.py:70(smart_guess)
        1    0.000    0.000    0.000    0.000 wordle_long.py:90(sample_targets)
        1    0.000    0.000    0.749    0.749 {built-in method builtins.exec}
   175037    0.012    0.000    0.012    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
   525096    0.089    0.000    0.089    0.000 {method 'count' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
    87516    0.014    0.000    0.014    0.000 {method 'join' of 'str' objects}
    87516    0.008    0.000    0.008    0.000 {method 'keys' of 'dict' objects}
    90272    0.012    0.000    0.012    0.000 {method 'remove' of 'list' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

所以罪魁禍首是compare 。 嘗試優化它。 例如：

def compare(guess, target):
    ltarget = list(target)
    result = ['-' for _ in guess]
    ##find right char, right place
    for i,c in enumerate(guess):
        if c == ltarget[i]:
            result[i] = 'X'
            ltarget[i] = ''
    ##find right char, wrong place
    for i,c in enumerate(guess):
        if c in ltarget:
            result[i] = 'O'
            ltarget[ltarget.index(c)] = ''
    return ''.join(result)

現在

>>> cProfile.run('smart_guess(ws,ws)')
         502586 function calls in 0.503 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.503    0.503 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:105(sample_targets)
    87516    0.232    0.000    0.288    0.000 wordle_long.py:12(compare)
        1    0.000    0.000    0.000    0.000 wordle_long.py:130(<listcomp>)
    87516    0.022    0.000    0.022    0.000 wordle_long.py:14(<listcomp>)
       51    0.208    0.004    0.503    0.010 wordle_long.py:74(distributions)
        1    0.000    0.000    0.503    0.503 wordle_long.py:85(smart_guess)
        1    0.000    0.000    0.503    0.503 {built-in method builtins.exec}
        5    0.000    0.000    0.000    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
   152343    0.022    0.000    0.022    0.000 {method 'index' of 'list' objects}
    87516    0.013    0.000    0.013    0.000 {method 'join' of 'str' objects}
    87516    0.007    0.000    0.007    0.000 {method 'keys' of 'dict' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

好一些。 現在， distributions是最慢的部分，所以我們應該優化它。 依此類推，直到達到可接受的執行時間。

減少 function 的運行時間

問題描述

2 個解決方案

解決方案1
0 2022-04-19 15:50:52

解決方案2
0 2022-04-19 16:09:08

減少 function 的運行時間

問題描述

2 個解決方案

解決方案1 0 2022-04-19 15:50:52

解決方案2 0 2022-04-19 16:09:08

解決方案1
0 2022-04-19 15:50:52

解決方案2
0 2022-04-19 16:09:08