[英]Minimax algorithm for Tic Tac Toe Python
我有點理解minimax算法是如何為Tic Tac Toe python工作的,但我不知道如何在Python中實際編碼...這是我到目前為止所做的:
from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
self._squares[i] = None
print(self._squares)
def showBoard(self) :
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
if self._squares[i] == None :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
if None not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
if None not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
def minimax(self, node, player, depth = 0, first = True) :
if first :
best = 0
self._copySquares = deepcopy(self._squares)
if node.complete() :
if node.getWinner() == "x" :
self._squares = self._copySquares
return -1 - depth
elif node.getWinner() == "tie" :
self._squares = self._copySquares
return 0
elif node.getWinner() == "o" :
self._squares = self._copySquares
return 1 + depth
best = None
for move in node.getAvailableMoves() :
depth += 1
node.makeMove(move, player)
print()
val = self.minimax(node, node.getEnemyPlayer(player), depth, first = False)
print(val)
if player == "o" :
if val > best :
best = val
else :
if val < best :
best = val
return best
print()
print()
def printCopy(self) :
print(self._copySquares)
但是,它從來沒有打印出所有的場景....有人請幫助! 這是周一的一個項目。
一些問題:
執行在第一次迭代時return
for
循環並return
:這是不成熟的,因為你永遠不會測試任何其他可用的移動。 該return
應該在循環之后發生。
在for
循環的每次迭代中增加深度值是錯誤的。 相反,將depth+1
傳遞給遞歸調用,這樣當您從那里返回時,您將繼續處於相同的深度。
在遞歸調用之前完成的移動必須在它之后立即收回,否則for
循環的下一次迭代將不會從相同的位置開始。
best
的值需要在每次調用minimax方法時初始化,而不僅僅是在遞歸樹的頂部。 此初始值不應為0,因為當前用戶的最佳值可能低於0.因此,您需要將其初始化為極差值。
minimax方法不返回最佳移動,僅返回評估值。 由於該方法的整個目的是告訴你應該播放哪個動作,你需要兩個。 因此,讓方法返回一個包含兩個值的元組:評估值和生成該值的移動。
一些非關鍵問題:
因為你想要延遲不可避免的損失,或者加速強制獲勝,當玩家獲勝時計算價值的公式應該越接近0,就越接近它。 因此,該公式需要改變。
由於您應該通過收回移動來恢復電路板,因此無需使用復制板和復制方塊。 如果所有編碼都很好,則在minimax方法的最高調用完成后,該板應該處於與該調用之前完全相同的狀態。
如果不對空方塊使用None
,而對單個字符,如“。”,則打印板會打印得更好。 因此,無論您在哪里引用空方格值,都要放置該字符。
你有print()
在這里和那里分開輸出。 將一個放在showBoard
方法中,其余的代碼可以不用它們。
鑒於以上幾點,您不需要node
也不需要minimax
方法的first
參數。
這是一個評論,更正的版本。 我把原來的線留在原處,但在需要的地方將它們評論出來。
# *** not needed:
# from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
# *** use a single character, ... easier to print
self._squares[i] = "."
print(self._squares)
def showBoard(self) :
# *** add empty line here, instead of in minimax
print ()
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
# *** see above
if self._squares[i] == "." :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
# *** see above
if "." not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
# *** see above
if "." not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
# *** no need for `node` argument, nor `first`
# *** use `self` instead of `node` in all this method
def minimax(self, player, depth = 0) :
# *** not needed
# if first :
# best = 0
# *** not needed
# self._copySquares = deepcopy(self._squares)
# *** always start with initilisation of `best`, but with worst possible value
# for this player
if player == "o":
best = -10
else:
best = 10
if self.complete() :
if self.getWinner() == "x" :
# *** don't do this, you may still need the position to try other moves
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return -10 + depth, None
elif self.getWinner() == "tie" :
# self._squares = self._copySquares
# *** expect tuple return value
return 0, None
elif self.getWinner() == "o" :
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return 10 - depth, None
# *** Execution can never get here
# best = None
for move in self.getAvailableMoves() :
# *** don't increase depth in each iteration, instead pass depth+1 to
# the recursive call
# depth += 1
self.makeMove(move, player)
# *** pass depth+1, no need for passing `node` nor `first`.
# *** expect tuple return value
val, _ = self.minimax(self.getEnemyPlayer(player), depth+1)
print(val)
# *** undo last move
self.makeMove(move, ".")
if player == "o" :
if val > best :
# *** Also keep track of the actual move
best, bestMove = val, move
else :
if val < best :
# *** Also keep track of the actual move
best, bestMove = val, move
# *** don't interrupt the loop here!
# return best
# *** this is dead code:
# print()
# print()
# *** Also keep track of the actual move
return best, bestMove
def printCopy(self) :
print(self._copySquares)
以下是如何使用該類的示例:
game = TicTacToeBrain()
game.createBoard()
game.makeMove(4, "o")
game.makeMove(3, "x")
val, bestMove = game.minimax("o")
print('best move', bestMove) # --> 0 is a winning move.
看它在eval.in上運行......等待它。
我不會為此提供代碼,但您可以:
在self.player
跟蹤它的轉彎。 這樣你就不必將玩家作為參數傳遞給minimax,這樣可以避免錯誤。 它也使構造函數參數有用 - 目前你不用它做任何事情。
添加方法bestMove
這就叫minimax
,但將只返回最理想的做法,而不是價值。 這將更容易管理。
使用alpha-beta修剪,以便在顯然無法改善遞歸樹中已經實現的值時停止評估其他移動。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.