如何在 Python 或 R 中使用梯度下降算法

Question

考慮 function： f(x,y) = x^2 + y^2 + 2y + 2 。 從 (x,y) = (3,4) 開始。 使用梯度下降算法編寫代碼（在 Python 或 R 中）以尋找局部最小值。 Plot function 在每一步的值 f。 這個局部最小值也是全局最小值嗎（提供一些解釋）？

Answer 1

最好的解決方案是在 PyTorch 中學習張量和 Auto-grad 作為開始： Auto-grad

然后你檢查並學習如何使用像 adam 和 SGD 這樣的 GD 並通過這個例子學習： beginner/PyTorch_with_example

一開始它看起來勢不可擋，但 PyTorch 提供了非常有效的工具來解決這些問題

Answer 2

不確定我的問題是否正確，閱讀了一些關於梯度下降算法的信息，但沒有得到它。 我所做的是使用 SymPy 庫找出您的 function 的最小值，並嘗試使用從起點向下的循環到達它（請參閱此處初始問題從 Numpy 中的單個點實現梯度下降？）。

這里我的代碼和 output 圖片，嘗試再次運行它改變

result = diff(3,4,sep = 0.5, target=1)值

並查看循環是否無限循環或達到中斷：

import numpy as np

import sympy as sym
from sympy.parsing.sympy_parser import parse_expr

from sympy import symbols, Eq, solve

import matplotlib.pyplot as plt

import random


class FunZ:
    
    def __init__(self, f):  
  
        self.x, self.y  = sym.symbols('x y')
        
        self.f = parse_expr(f)
        
        # print('f : ', self.f)
        
    def evalu(self, xx, yy):
    
        return float(self.f.subs({self.x: xx, self.y: yy}).evalf())
    

    def derX(self, xx, yy):        
        
        self.dx = sym.diff(self.f, self.x)
        
        # print('dx : ', self.dx)
        
        return float(self.dx.subs({self.x: xx, self.y: yy}).evalf())
    
    def derY(self, xx, yy):
        
        self.dy = sym.diff(self.f, self.y)
        
        # print('dy :', self.dy)
        
        return float(self.dy.subs({self.x: xx, self.y: yy}).evalf())
    
    def derXY(self, xx, yy):
        
        return [float(self.derX(xx, yy)), float(self.derY(xx, yy))]
    
    def minim(self):
        
        self.dx = sym.diff(self.f, self.x)
        self.dy = sym.diff(self.f, self.y)
        print('dx : ', self.dx)
        print('dy : ', self.dy)
        
        eq1 = Eq(self.dx ,0)
        
        eq2 = Eq(self.dy ,0)
        
        solu = solve((eq1,eq2), (self.x, self.y), dict= False)
        
        print(solu, type(solu))
        
        
        
        return solu
 



        

XX = np.linspace(-3, 3, 100)

YY = np.linspace(-3, 3, 100)

funz = FunZ('x**2 + y**2 + 2*y + 2')

ij = [(x, y, funz.evalu(x, y)) for x in XX for y in YY]


arr = np.array(ij, dtype=float)

# print(arr, arr.size, arr.shape, arr.dtype)


der_x = [(a, b, funz.derX(a, b)) for a in XX for b in YY] 

derX = np.array(der_x)

# print(derX, derX.size, derX.shape, derX.dtype)


der_y = [(a, b, funz.derY(a, b)) for a in XX for b in YY] 

derY = np.array(der_y)

# print(derY, derY.size, derY.shape, derY.dtype)


x = arr[:, 0]
y = arr[:, 1]

data = arr[:, 2]

fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(221, projection="3d", title="f(x,y)")
ax.plot_trisurf(x, y, data, color="red", alpha=0.5)

ax.set_xlabel("X")
ax.set_ylabel("Y")

ax2 = fig.add_subplot(223, projection="3d", title=r'$D_{X}$'+'f(x,y)')
ax2.plot_trisurf(
    x, y, derX[:, 2], color="blue", alpha=0.2) 

ax3 = fig.add_subplot(224, projection="3d", title=r'$D_{Y}$'+'f(x,y)')
ax3.plot_trisurf(
    x, y, derY[:, 2], color="green", alpha=0.2)  
plt.show()


dict_min = funz.minim()

print('dict_min[funz.x] : ',    dict_min[funz.x])

print('dict_min[funz.y] : ',    dict_min[funz.y])



def diff(x, y, func = FunZ('x**2 + y**2 + 2*y + 2'), sep= 0.5, target=1):
    
    cnt = 0
    
    xx = x

    yy = y

    
    while True:
    # while func.evalu(xx,yy) > 1:
        cnt +=1
        
        xl = [xx - sep, xx, xx + sep]
    
        yl = [yy - sep, yy, yy + sep]
    
        ij = []
        for xxx in xl:
            for yyy in yl:
                if func.evalu(xxx, yyy)-func.evalu(x, y) < 0:
                    ij.append((xxx,yyy, func.evalu(xxx, yyy)-func.evalu(x, y)))
                else:
                    pass
        
        random_step = random.choice(ij)  ### could choose steeper in list
        
        # ####### stepeer in list
        # ij.sort(key=lambda tup: tup[2])  # sorts in place
        # print(ij)
        # random_step = ij[0]
        # #########################
        
        print(ij,'\n', random_step,' : ',(random_step[0], random_step[1]),
              'cnt : ', cnt,'value : ',  func.evalu(random_step[0], random_step[1]))
        xx = random_step[0]
        yy = random_step[1]
        if func.evalu(random_step[0], random_step[1]) <= target:
            break
        else:
            pass
        
    return( xx,yy,cnt,sep, func.evalu(xx,yy))


result = diff(3,4,sep = 0.5, target=1)

print('\nfunction minimum at x: ',dict_min[funz.x],' y : ',dict_min[funz.y],' value ; ',
      funz.evalu(dict_min[funz.x], dict_min[funz.y]))
print('\nstarting at 3,4 got : x: ', result[0], ' y : ' ,  result[1],'value :', result[4] ,' in ', result[2],' cycles, using '
      , result[3],' steps with random choice, should try choosing steeper gradient and see what happens')

function 和偏導數的圖片：

output for result = diff(3,4,sep = 0.5, target=1) using random.choice

........
[(-1.0, -2.0, -32.0), (-1.0, -1.5, -32.75), (-1.0, -1.0, -33.0), (-0.5, -2.0, -32.75), (-0.5, -1.5, -33.5), (-0.5, -1.0, -33.75), (0.0, -2.0, -33.0), (0.0, -1.5, -33.75), (0.0, -1.0, -34.0)] 
 (0.0, -1.0, -34.0)  :  (0.0, -1.0) cnt :  496 value :  1.0

function minimum at x:  0  y :  -1  value ;  1.0

starting at 3,4 got : x:  0.0  y :  -1.0 value : 1.0  in  496  cycles, using  0.5  steps with random choice, should try choosing steeper gradient and see what happens

我也在嘗試在 matplotlib 子圖上寫軌跡，但需要先弄清楚如何。

如何在 Python 或 R 中使用梯度下降算法

問題描述

2 個解決方案

解決方案1
0 2021-12-01 23:34:26

解決方案2
0 2021-12-04 21:36:29

如何在 Python 或 R 中使用梯度下降算法

問題描述

2 個解決方案

解決方案1 0 2021-12-01 23:34:26

解決方案2 0 2021-12-04 21:36:29

解決方案1
0 2021-12-01 23:34:26

解決方案2
0 2021-12-04 21:36:29