[英]Float object is not iterable [Python]
嘿,我是 Python 編程的新手,我在函數中遇到了問題,但我沒有浮點數。 它基本上是解決蛇和梯子游戲的值迭代算法,我有這個錯誤:在我的算法的 Q 函數中,浮動對象不可迭代。 我知道這個錯誤是微不足道的,所以我希望你能輕松地幫助我。 非常感謝。
class Snakes_Ladders(object):
def __init__(self, N):
self.N=N
def Begin(self):
return 1
def isEnd(self, state):
return state == self.N
def discount(self):
return 1.
def states(self):
return range(1, self.N+1)
def actions(self, state):
result=[]
if state<=self.N or state+1<=self.N:
result.append('Security')
if state<=self.N or state+1<=self.N or state+2<=self.N:
result.append('Normal')
return result
def TransiMat(self, state, action):
result = []
#Returns a list with (NewState,Proba, Reward)
if action=='Security'and state!=3:
result.append((state, 0.5, -1))
result.append((state+1, 0.5, -1))
if action=='Security' and state==3:
result.append((state, 0.5, -1))
result.append((state+1, 0.25, -1))
result.append((state+8, 0.25, -1))
if action=='Normal' and state!=3:
result.append((state, 0.33, -1))
result.append((state+1, 0.33, -1))
result.append((state+2, 0.33, -1))
#fast lane
if action=='Normal' and state==3:
result.append((state,0.33,-1))
result.append((state+1,0.165,-1))
result.append((state+8,0.165,-1))
##Traps
if action=='Normal' and state==5:
result.append((0, 0.33, -1))
result.append((state+1, 0.33, -1))
result.append((state+2, 0.33, -1))
if action=='Normal' and state==7:
result.append((state-3, 0.33, -1))
result.append((state+1, 0.33, -1))
result.append((state+2, 0.33, -1))
if action=='Normal' and state==9:
result.append((state, 0.33, -1))
result.append((state+1, 0.33, -1))
result.append((state+2, 0.33, -1))
return result
def ValueIteration(game):
V={} #Initialization
for state in game.states():
V[state] = 0.
def Q(state,action):
i = 0.
for result in game.TransiMat(state, action):
newState=result[0]
prob=result[1]
reward=result[2]
print(type(newState))
**return sum(prob*(reward + game.discount()*V[newState]))** #The error is called here : Float
object is not iterable
while True:
#Compute new values given the old values
newV = {}
for state in game.states():
if game.isEnd(state):
newV[state]=0.
else:
newV[state] = max(Q(state,action) for action in game.actions(state))
#Convergence
if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:
break
V=newV
#Policy
pi={}
for state in game.states():
if game.isEnd[state]:
pi[state]= 'none'
else:
pi[state] = max((Q(state,action), action) for action in game.actions(state))[1]
game=Snakes_Ladders(N=15)
#print(game.actions(3))
print(game.TransiMat(1,'Security'))
ValueIteration(game)
你有回溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-188-35da2c569c07> in <module>
104 print(game.TransiMat(1,'Security'))
105
--> 106 ValueIteration(game)
107 raise KeyError(message)
108
<ipython-input-188-35da2c569c07> in ValueIteration(game)
85 newV[state]=0.
86 else:
---> 87 newV[state] = max(Q(state,action) for action in game.actions(state))
88 #Convergence
89 if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:
<ipython-input-188-35da2c569c07> in <genexpr>(.0)
85 newV[state]=0.
86 else:
---> 87 newV[state] = max(Q(state,action) for action in game.actions(state))
88 #Convergence
89 if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:
<ipython-input-188-35da2c569c07> in Q(state, action)
75 reward=result[2]
76 print(type(newState))
---> 77 return sum(prob*(reward + game.discount()*V[newState]))
78
79 while True:
TypeError: 'float' object is not iterable
sum (iterable, /, start=0)
從左到右對 start 和 iterable 的項求和並返回總數。 iterable 的項通常是數字,並且起始值不允許是字符串。
在sum
添加一個默認列表應該可以解決問題 -
def Q(state,action):
i = 0.
for result in game.TransiMat(state, action):
newState=result[0]
prob=result[1]
reward=result[2]
print(type(newState))
return sum([prob*(reward + game.discount()*V[newState])])
或者,如果只需要單個值,那么使用sum
沒有意義,只需返回值
def Q(state,action):
...
return prob*(reward + game.discount()*V[newState])
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.