簡體   English   中英

Float 對象不可迭代 [Python]

[英]Float object is not iterable [Python]

嘿,我是 Python 編程的新手,我在函數中遇到了問題,但我沒有浮點數。 它基本上是解決蛇和梯子游戲的值迭代算法,我有這個錯誤:在我的算法的 Q 函數中,浮動對象不可迭代。 我知道這個錯誤是微不足道的,所以我希望你能輕松地幫助我。 非常感謝。

class Snakes_Ladders(object):


    def __init__(self, N):
        self.N=N

    def Begin(self):
        return 1


    def isEnd(self, state):

        return state == self.N

    def discount(self):
        return 1.

    def states(self):
        return range(1, self.N+1)

    def actions(self, state):
        result=[]
        if state<=self.N or state+1<=self.N:
            result.append('Security')
        if state<=self.N or state+1<=self.N or state+2<=self.N:
            result.append('Normal')
        return result

    def TransiMat(self, state, action):
        result = []
    #Returns a list with (NewState,Proba, Reward)
        if action=='Security'and state!=3:
            result.append((state, 0.5, -1))
            result.append((state+1, 0.5, -1))
        if action=='Security' and state==3:
            result.append((state, 0.5, -1))
            result.append((state+1, 0.25, -1))
            result.append((state+8, 0.25, -1))
        if action=='Normal' and state!=3:
            result.append((state, 0.33, -1))
            result.append((state+1, 0.33, -1))
            result.append((state+2, 0.33, -1))
        #fast lane
        if action=='Normal' and state==3:
            result.append((state,0.33,-1))
            result.append((state+1,0.165,-1))
            result.append((state+8,0.165,-1))
          ##Traps
        if action=='Normal' and state==5:
            result.append((0, 0.33, -1))
            result.append((state+1, 0.33, -1))
            result.append((state+2, 0.33, -1))
        if action=='Normal' and state==7:
            result.append((state-3, 0.33, -1))
            result.append((state+1, 0.33, -1))
            result.append((state+2, 0.33, -1))
        if action=='Normal' and state==9:
            result.append((state, 0.33, -1))
            result.append((state+1, 0.33, -1))
            result.append((state+2, 0.33, -1))
        return result


def ValueIteration(game):

    V={} #Initialization
    for state in game.states():
        V[state] = 0.

    def Q(state,action):
        i = 0.
        for result in game.TransiMat(state, action):
                newState=result[0]
                prob=result[1]
                reward=result[2]
                print(type(newState))
        **return sum(prob*(reward + game.discount()*V[newState]))** #The error is called here : Float 
                                                                      object is not iterable

    while True:
        #Compute new values given the old values

        newV = {}
        for state in game.states():
            if game.isEnd(state):
                newV[state]=0.
            else:
                newV[state] = max(Q(state,action) for action in game.actions(state))
  #Convergence
        if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:
            break
        V=newV

  #Policy

    pi={}
    for state in game.states():
        if game.isEnd[state]:
            pi[state]= 'none'
        else:
            pi[state] = max((Q(state,action), action) for action in game.actions(state))[1]

game=Snakes_Ladders(N=15)
#print(game.actions(3))
print(game.TransiMat(1,'Security'))

ValueIteration(game)

你有回溯:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-188-35da2c569c07> in <module>
    104 print(game.TransiMat(1,'Security'))
    105 
--> 106 ValueIteration(game)
    107 raise KeyError(message)
    108 

<ipython-input-188-35da2c569c07> in ValueIteration(game)
     85                 newV[state]=0.
     86             else:
---> 87                 newV[state] = max(Q(state,action) for action in game.actions(state))
     88   #Convergence
     89         if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:

<ipython-input-188-35da2c569c07> in <genexpr>(.0)
     85                 newV[state]=0.
     86             else:
---> 87                 newV[state] = max(Q(state,action) for action in game.actions(state))
     88   #Convergence
     89         if max(abs(V[state]- newV[state]) for state in game.states())<1e-10:

<ipython-input-188-35da2c569c07> in Q(state, action)
     75                 reward=result[2]
     76                 print(type(newState))
---> 77         return sum(prob*(reward + game.discount()*V[newState]))
     78 
     79     while True:

TypeError: 'float' object is not iterable

sum (iterable, /, start=0)

從左到右對 start 和 iterable 的項求和並返回總數。 iterable 的項通常是數字,並且起始值不允許是字符串。

sum添加一個默認列表應該可以解決問題 -

def Q(state,action):
    i = 0.
    for result in game.TransiMat(state, action):
        newState=result[0]
        prob=result[1]
        reward=result[2]
        print(type(newState))
    return sum([prob*(reward + game.discount()*V[newState])])

或者,如果只需要單個值,那么使用sum沒有意義,只需返回值

def Q(state,action):
    ...
    return  prob*(reward + game.discount()*V[newState])

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM