[英]How to Use Gradient Descent Algorithm in Python or R
考慮 function: f(x,y) = x^2 + y^2 + 2y + 2
。 從 (x,y) = (3,4) 開始。 使用梯度下降算法編寫代碼(在 Python 或 R 中)以尋找局部最小值。 Plot function 在每一步的值 f。 這個局部最小值也是全局最小值嗎(提供一些解釋)?
最好的解決方案是在 PyTorch 中學習張量和 Auto-grad 作為開始: Auto-grad
然后你檢查並學習如何使用像 adam 和 SGD 這樣的 GD 並通過這個例子學習: beginner/PyTorch_with_example
一開始它看起來勢不可擋,但 PyTorch 提供了非常有效的工具來解決這些問題
不確定我的問題是否正確,閱讀了一些關於梯度下降算法的信息,但沒有得到它。 我所做的是使用 SymPy 庫找出您的 function 的最小值,並嘗試使用從起點向下的循環到達它(請參閱此處初始問題從 Numpy 中的單個點實現梯度下降? )。
這里我的代碼和 output 圖片,嘗試再次運行它改變
result = diff(3,4,sep = 0.5, target=1)
值
並查看循環是否無限循環或達到中斷:
import numpy as np
import sympy as sym
from sympy.parsing.sympy_parser import parse_expr
from sympy import symbols, Eq, solve
import matplotlib.pyplot as plt
import random
class FunZ:
def __init__(self, f):
self.x, self.y = sym.symbols('x y')
self.f = parse_expr(f)
# print('f : ', self.f)
def evalu(self, xx, yy):
return float(self.f.subs({self.x: xx, self.y: yy}).evalf())
def derX(self, xx, yy):
self.dx = sym.diff(self.f, self.x)
# print('dx : ', self.dx)
return float(self.dx.subs({self.x: xx, self.y: yy}).evalf())
def derY(self, xx, yy):
self.dy = sym.diff(self.f, self.y)
# print('dy :', self.dy)
return float(self.dy.subs({self.x: xx, self.y: yy}).evalf())
def derXY(self, xx, yy):
return [float(self.derX(xx, yy)), float(self.derY(xx, yy))]
def minim(self):
self.dx = sym.diff(self.f, self.x)
self.dy = sym.diff(self.f, self.y)
print('dx : ', self.dx)
print('dy : ', self.dy)
eq1 = Eq(self.dx ,0)
eq2 = Eq(self.dy ,0)
solu = solve((eq1,eq2), (self.x, self.y), dict= False)
print(solu, type(solu))
return solu
XX = np.linspace(-3, 3, 100)
YY = np.linspace(-3, 3, 100)
funz = FunZ('x**2 + y**2 + 2*y + 2')
ij = [(x, y, funz.evalu(x, y)) for x in XX for y in YY]
arr = np.array(ij, dtype=float)
# print(arr, arr.size, arr.shape, arr.dtype)
der_x = [(a, b, funz.derX(a, b)) for a in XX for b in YY]
derX = np.array(der_x)
# print(derX, derX.size, derX.shape, derX.dtype)
der_y = [(a, b, funz.derY(a, b)) for a in XX for b in YY]
derY = np.array(der_y)
# print(derY, derY.size, derY.shape, derY.dtype)
x = arr[:, 0]
y = arr[:, 1]
data = arr[:, 2]
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(221, projection="3d", title="f(x,y)")
ax.plot_trisurf(x, y, data, color="red", alpha=0.5)
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax2 = fig.add_subplot(223, projection="3d", title=r'$D_{X}$'+'f(x,y)')
ax2.plot_trisurf(
x, y, derX[:, 2], color="blue", alpha=0.2)
ax3 = fig.add_subplot(224, projection="3d", title=r'$D_{Y}$'+'f(x,y)')
ax3.plot_trisurf(
x, y, derY[:, 2], color="green", alpha=0.2)
plt.show()
dict_min = funz.minim()
print('dict_min[funz.x] : ', dict_min[funz.x])
print('dict_min[funz.y] : ', dict_min[funz.y])
def diff(x, y, func = FunZ('x**2 + y**2 + 2*y + 2'), sep= 0.5, target=1):
cnt = 0
xx = x
yy = y
while True:
# while func.evalu(xx,yy) > 1:
cnt +=1
xl = [xx - sep, xx, xx + sep]
yl = [yy - sep, yy, yy + sep]
ij = []
for xxx in xl:
for yyy in yl:
if func.evalu(xxx, yyy)-func.evalu(x, y) < 0:
ij.append((xxx,yyy, func.evalu(xxx, yyy)-func.evalu(x, y)))
else:
pass
random_step = random.choice(ij) ### could choose steeper in list
# ####### stepeer in list
# ij.sort(key=lambda tup: tup[2]) # sorts in place
# print(ij)
# random_step = ij[0]
# #########################
print(ij,'\n', random_step,' : ',(random_step[0], random_step[1]),
'cnt : ', cnt,'value : ', func.evalu(random_step[0], random_step[1]))
xx = random_step[0]
yy = random_step[1]
if func.evalu(random_step[0], random_step[1]) <= target:
break
else:
pass
return( xx,yy,cnt,sep, func.evalu(xx,yy))
result = diff(3,4,sep = 0.5, target=1)
print('\nfunction minimum at x: ',dict_min[funz.x],' y : ',dict_min[funz.y],' value ; ',
funz.evalu(dict_min[funz.x], dict_min[funz.y]))
print('\nstarting at 3,4 got : x: ', result[0], ' y : ' , result[1],'value :', result[4] ,' in ', result[2],' cycles, using '
, result[3],' steps with random choice, should try choosing steeper gradient and see what happens')
function 和偏導數的圖片:
output for result = diff(3,4,sep = 0.5, target=1)
using random.choice
........
[(-1.0, -2.0, -32.0), (-1.0, -1.5, -32.75), (-1.0, -1.0, -33.0), (-0.5, -2.0, -32.75), (-0.5, -1.5, -33.5), (-0.5, -1.0, -33.75), (0.0, -2.0, -33.0), (0.0, -1.5, -33.75), (0.0, -1.0, -34.0)]
(0.0, -1.0, -34.0) : (0.0, -1.0) cnt : 496 value : 1.0
function minimum at x: 0 y : -1 value ; 1.0
starting at 3,4 got : x: 0.0 y : -1.0 value : 1.0 in 496 cycles, using 0.5 steps with random choice, should try choosing steeper gradient and see what happens
我也在嘗試在 matplotlib 子圖上寫軌跡,但需要先弄清楚如何。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.