减少 function 的运行时间

Question

对于一个学校项目，我必须编写一个 Wordle 游戏。 现在，我已经差不多完成了，但是我还需要完成一项要求。 某个 function 需要在 1 秒内完成，但是我的需要将近 6 秒才能完成。 function 称为'smart_guess'，它需要两个arguments：'wordlist' 和'targets'。 “Wordlist”是您可以猜测的单词列表。 “目标”是一个可能的目标词列表，基于之前的猜测。 当还没有猜测时，targets 将等于 wordlist。 返回值是wordlist中出现的一个词（即字符串）。 这应该是一个聪明的猜测，有助于在很少的回合内找到实际目标。

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])
    
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples

这是我想让运行速度更快的 function。 为了更清楚，我将在此处添加我的整个程序：

import random

def load_words(file):
  result = set()
  with open(file) as f:
    for line in f.readlines():
       word = line.strip().lower()
       if word.isalpha() and word.isascii():
         result.add(word)
  return sorted(result)

def compare(guess, target):
    ''' Compare two words and give string with 'X' letter is in good place, 'O' not in good place but in word and '-': not in the word. '''
    result = list(target)
    index_list = list(range(len(guess)))
    letter_dict = {}
    for letter in target:
        letter_dict[letter] = target.count(letter)
        
    # Iterate list of indexes
    for idx in range(len(index_list)):
        # Look which letters are in good place
        if guess[idx] == target[idx]:
            # Decrease letter count
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Delete index from list add 'X'
            result[idx] = "X"
            index_list.remove(idx)
            
    for idx in index_list:
        #Check if letter still is in letter_dict and in target
        if guess[idx] in target and letter_dict[guess[idx]] > 0:
            # Remove lettercount from dict
            letter_dict[guess[idx]] = letter_dict[guess[idx]] - 1
            # Add 'O' to place in guess_list 
            result[idx] = "O"
        else:
            result[idx] = "-"
            
    return  "".join(result)

dutch_words = load_words("wordlist.txt")
d6 = [word for word in dutch_words if len(word) == 6]
d6q = [word for word in d6 if word.startswith("q")]


def filter_targets(targets, guess_results):
   final_targets = []  
   for target in targets:
        #Create list with compared results
       temp_list = []
       for guess in guess_results:
          temp_list.append(compare(guess, target))   
        #Compare results are the same, add to final_targets  
       if temp_list == list(guess_results.values()):
          final_targets.append(target)
   return final_targets
    
def distributions(guess, targets):
    distr_dict = {}
    #Check how many times compared gives result
    for target in targets:
        result = compare(guess, target)
        if result not in list(distr_dict.keys()):
            distr_dict[result] = 1
        else:
            distr_dict[result] += 1 
    return distr_dict

def smart_guess(wordlist, targets):
    ''' Returns best guess after comparing the distributions of each sampled guess '''
    
    #Get randomized sample from targetlist 
    samples = sample_targets(targets)
    #Get big number to start with
    min_largest_value = len(wordlist)
    best_guess = ""
    #Iterate trough samples
    for guess in samples:
        #Find the biggest number in distribution
        biggest_value_in_distr = max(distributions(guess, targets).values())
        #Check if biggest number is the smallest of all, if so, add the guess to best_guess
        if biggest_value_in_distr < min_largest_value:
            min_largest_value = biggest_value_in_distr
            best_guess = guess
            if min_largest_value <= 2:
                return best_guess  
    return best_guess
 
def sample_targets(targets):
    #Get randomized sample from targetlist and add a total random word
    len_word = len(targets[0])
    decr = 10
    if len_word == 4:
        sample_size = 100
    decr -= 1
    if len_word == 5:
        sample_size = 100
    decr -= 1
    if len_word == 6:
        sample_size = len_word * decr 
    decr -= 1
    if len_word == 7:
        sample_size = 60
    decr -= 1
    if len_word == 8:
        sample_size = len_word * decr
    decr -= 1
    if len_word == 9:
        sample_size = 8
    decr -= 1
    if len_word == 10:
        sample_size = 5
    
    samples = set([i for i in targets[0:sample_size]])

    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    samples.add(random.choice(targets))
    return samples


def simulate_game(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0] == target[0]]
    if target not in wordlist:
      raise ValueError("Target is not in wordlist, thus impossible to guess.")
    targets = wordlist.copy()
    turns = 0
    while True:
       num_words = len(targets)
       print(f"There {'is' if num_words==1 else 'are'} {num_words} possible"
                f" target{'s' if num_words!=1 else ''} left.")
       turns += 1
       guess = smart_guess(wordlist, targets)
       print("My guess is: ", guess.upper())
       result = compare(guess, target)
       print("Correctness: ", result)
       if result == n * "X":
          print(f"Target was guessed in {turns} "
                 f"turn{'s' if turns!=1 else ''}.")
          break
       else:
          targets = filter_targets(targets, {guess: result})

def count_turns(target, wordlist):
    n = len(target)
    wordlist = [w for w in wordlist if len(w) == n and w[0]==target[0]]
    targets = wordlist.copy()
    turns = 0
    while True:
      turns += 1
      if turns > 100:
        raise RuntimeError("This is going nowhere: 100 turns used.")
      guess = smart_guess(wordlist, targets)
      result = compare(guess, target)
      if result == n * "X":
        break
      else:
          targets = filter_targets(targets, {guess: result})
    return turns

def turn_count_simulation(word_length, wordlist, runs=100):
    wordlist = [word for word in wordlist if len(word) == word_length]
    total = 0
    for _ in range(runs):
       target = random.choice(wordlist)
       total += count_turns(target, wordlist)
    return total/runs

Answer 1

减少运行时间有很多步骤。

第一步：使vars成为一个衬垫。

可以说我有x=1和y=2 。 而不是将它们写在 2 行上，而是x,y=1,2 。

第 2 步：删除语句中的 bool 值。 假设我有一个 while 语句， while x == True: 。 我会用while x:替换它，如果 x 是 false 那么我会做， while not x: 。

第三步：做一个衬里。 Python 线路多的时候比较慢。 我们可以使用一个衬垫去除大约 40% 的线条。 而不是做x+=1 ， y+=1之类的事情。 我们可以做x+=1; y+=1 x+=1; y+=1 。

结论：python中代码优化的步骤很多，这只是其中的三个。 我建议您研究这三个步骤，以大致了解如何实施它们。

Answer 2

您需要的是分析您的代码以了解哪里花费的时间太长。 因此，您不会浪费时间尝试优化不会真正影响总时序的部分。

让我们尝试只生成一些“单词”（1716：字母表前 13 个字母中的 6 个的组合）和配置文件smart_guess ：

import cProfile
from itertools import combinations
ws=[''.join(c) for c in combinations('ABCDEFGHIJKLM',6)]

cProfile.run('smart_guess(ws,ws)')
         1053127 function calls in 0.749 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.749    0.749 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:115(<listcomp>)
    87516    0.372    0.000    0.500    0.000 wordle_long.py:12(compare)
       51    0.242    0.005    0.749    0.015 wordle_long.py:59(distributions)
        1    0.000    0.000    0.749    0.749 wordle_long.py:70(smart_guess)
        1    0.000    0.000    0.000    0.000 wordle_long.py:90(sample_targets)
        1    0.000    0.000    0.749    0.749 {built-in method builtins.exec}
   175037    0.012    0.000    0.012    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
   525096    0.089    0.000    0.089    0.000 {method 'count' of 'str' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
    87516    0.014    0.000    0.014    0.000 {method 'join' of 'str' objects}
    87516    0.008    0.000    0.008    0.000 {method 'keys' of 'dict' objects}
    90272    0.012    0.000    0.012    0.000 {method 'remove' of 'list' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

所以罪魁祸首是compare 。 尝试优化它。 例如：

def compare(guess, target):
    ltarget = list(target)
    result = ['-' for _ in guess]
    ##find right char, right place
    for i,c in enumerate(guess):
        if c == ltarget[i]:
            result[i] = 'X'
            ltarget[i] = ''
    ##find right char, wrong place
    for i,c in enumerate(guess):
        if c in ltarget:
            result[i] = 'O'
            ltarget[ltarget.index(c)] = ''
    return ''.join(result)

现在

>>> cProfile.run('smart_guess(ws,ws)')
         502586 function calls in 0.503 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.503    0.503 <string>:1(<module>)
        3    0.000    0.000    0.000    0.000 random.py:224(_randbelow)
        3    0.000    0.000    0.000    0.000 random.py:256(choice)
        1    0.000    0.000    0.000    0.000 wordle_long.py:105(sample_targets)
    87516    0.232    0.000    0.288    0.000 wordle_long.py:12(compare)
        1    0.000    0.000    0.000    0.000 wordle_long.py:130(<listcomp>)
    87516    0.022    0.000    0.022    0.000 wordle_long.py:14(<listcomp>)
       51    0.208    0.004    0.503    0.010 wordle_long.py:74(distributions)
        1    0.000    0.000    0.503    0.503 wordle_long.py:85(smart_guess)
        1    0.000    0.000    0.503    0.503 {built-in method builtins.exec}
        5    0.000    0.000    0.000    0.000 {built-in method builtins.len}
       51    0.000    0.000    0.000    0.000 {built-in method builtins.max}
        3    0.000    0.000    0.000    0.000 {method 'add' of 'set' objects}
        3    0.000    0.000    0.000    0.000 {method 'bit_length' of 'int' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        3    0.000    0.000    0.000    0.000 {method 'getrandbits' of '_random.Random' objects}
   152343    0.022    0.000    0.022    0.000 {method 'index' of 'list' objects}
    87516    0.013    0.000    0.013    0.000 {method 'join' of 'str' objects}
    87516    0.007    0.000    0.007    0.000 {method 'keys' of 'dict' objects}
       51    0.000    0.000    0.000    0.000 {method 'values' of 'dict' objects}

好一些。 现在， distributions是最慢的部分，所以我们应该优化它。 依此类推，直到达到可接受的执行时间。

减少 function 的运行时间

问题描述

2 个解决方案

解决方案1
0 2022-04-19 15:50:52

解决方案2
0 2022-04-19 16:09:08

减少 function 的运行时间

问题描述

2 个解决方案

解决方案1 0 2022-04-19 15:50:52

解决方案2 0 2022-04-19 16:09:08

解决方案1
0 2022-04-19 15:50:52

解决方案2
0 2022-04-19 16:09:08