[英]Get more speed in pandas Dataframe
我編寫了這段代碼來獲取股市數據,在獲取數據后,我將其保存在 Mongo 數據庫中,然后我從 Mongo 數據庫中獲取所需的數據並將其轉換為 dataframe。使用行中的數據,我計算我需要的價值觀。 此操作大約需要 35 秒。 我需要這個手術在盡可能短的時間內完成,越少越好。 謝謝你的指導
%%time
from scipy.stats import norm
import numpy as np
import math
import requests
import pandas as pd
import jdatetime
import json
import time
import pymongo
start = time.time()
client = pymongo.MongoClient()
database = client['raw']
cursor = database.list_collection_names()
cursor.sort()`your text`
Raw = database[cursor[-1]]
data = pd.DataFrame(Raw.find())
df = data[["tse_url", "l18", "l30", 'pc', 'pl', 'tno', 'tvol', 'tval', 'py', 'stock']]
option = df.loc[df['stock'].isin(['311', '312', '320', '321'])]
db = client['transaction']
dbCol = db.list_collection_names()
dbCol.sort()
Trans = db[dbCol[-1]]
trans = pd.DataFrame(Trans.find({}, {"tse_url", "row", "buy", 'sell'}))
trans = trans.drop('_id',axis=1)
trans = trans.loc[trans['row'] == '1']
binazir = client['binazir']
lst = binazir.list_collection_names()
lst.sort()
latest = binazir[lst[-1]]
OC = pd.DataFrame(latest.find({}, {"tse_url", "open_positions", "contract_size"}))
option.insert(3, 'contract', '')
option.insert(3, 'blackscholes', '')
option.insert(3, 'open', '')
option.insert(3, '%t', '')
option.insert(3, 'through', '')
option.insert(3, '%pc', '')
option.insert(3, '%pl', '')
option.insert(3, 's1', '')
option.insert(3, 'b1', '')
option.insert(3, 'mature', '')
option.insert(3, 'status', '')
option.insert(3, 'bp', '')
option.insert(3, 'strike', '')
option.insert(3, 'volatility', '')
option.insert(3, 'SellBS', '')
option.insert(3, 'BuyBS', '')
option.insert(3, 'lever', '')
option.insert(3, 'delta', '')
option.insert(3, 'deltaLever', '')
option.insert(3, 'vega', '')
option.insert(3, 'theta', '')
option.insert(3, 'gamma', '')
option.insert(3, 'rho', '')
option.insert(3, 'margin', '')
option.insert(3, 'undif', '')
for a in zip(option['tse_url'], option.index):
for b in zip(trans['tse_url'], trans['buy'], trans['sell']):
if a[0] == b[0]:
option.at[a[1], 's1'] = b[2]
option.at[a[1], 'b1'] = b[1]
for a in zip(option['l30'], option.index):
x = a[0].split('-')
option.at[a[1], 'strike'] = x[1]
namad = x[0][8:]
if x[2].startswith('00'):
date = x[2].replace('00', '1400')
elif x[2].startswith('01'):
date = x[2].replace('01', '1401')
else:
date = x[2]
if date[4] == '/':
today = jdatetime.datetime.strptime(jdatetime.datetime.now().strftime("%Y%m%d"), "%Y%m%d")
mature = (jdatetime.datetime.strptime((date.replace('/', '')), "%Y%m%d") - today).days
option.at[a[1], 'mature'] = mature
else:
today = jdatetime.datetime.strptime(jdatetime.datetime.now().strftime("%Y%m%d"), "%Y%m%d")
mature = (jdatetime.datetime.strptime(date, "%Y%m%d") - today).days
option.at[a[1], 'mature'] = mature
x = namad.replace("هموزن", "هم وزن")
y = x.replace("حافرين", "حآفرين")
z = y.replace("ص.دارا", "دارا")
for d in zip(data['l18'], data['pc'], data['tse_url']):
if z == d[0]:
option.at[a[1], 'bp'] = d[1]
option.at[a[1], 'StockName'] = z
option.at[a[1], 'StockURL'] = d[2]
for a in zip(option['bp'], option['strike'], option['stock'], option.index):
if a[2] == '311' or a[2] == '320':
if int(a[0]) > int(a[1]):
option.at[a[3], 'status'] = 'ITM'
elif int(a[0]) < int(a[1]):
option.at[a[3], 'status'] = 'OTM'
else:
option.at[a[3], 'status'] = 'ATM'
else:
if int(a[0]) < int(a[1]):
option.at[a[3], 'status'] = 'ITM'
elif int(a[0]) > int(a[1]):
option.at[a[3], 'status'] = 'OTM'
else:
option.at[a[3], 'status'] = 'ATM'
for a in zip(option['pl'], option['strike'], option['s1'], option.index, option['bp'], option['stock']):
if a[5] == '311' or a[5] == '320':
if a[2] == '0':
t = (int(a[0]) + int(a[1]))
option.at[a[3], 'through'] = t
else:
t = (int(a[2]) + int(a[1]))
option.at[a[3], 'through'] = t
else:
if a[2] == '0':
t = (int(a[1]) - int(a[0]))
option.at[a[3], 'through'] = t
else:
t = (int(a[1]) - int(a[2]))
option.at[a[3], 'through'] = t
pt = (t - int(a[4])) / int(a[4]) * 100
option.at[a[3], '%t'] = round(pt, 2)
for a in zip(option['tse_url'], option.index):
for b in zip(OC['tse_url'],OC["open_positions"], OC["contract_size"]):
if a[0] == b[0]:
option.at[a[1], 'open'] = b[1]
option.at[a[1], 'contract'] = b[2]
else:
option.at[a[1], 'open'] = 0
option.at[a[1], 'contract'] = 1000
for a in zip(option['pl'], option['pc'], option['py'], option.index):
option.at[a[3], '%pl'] = round((int(a[0]) - int(a[2])) / int(a[2]) * 100, 2)
option.at[a[3], '%pc'] = round((int(a[1]) - int(a[2])) / int(a[2]) * 100, 2)
_list = set()
for a in zip(option['StockURL']):
_list.add(a[0])
for b in _list:
col = {
0: 'ticker',
1: 'date',
2: 'first',
3: 'high',
4: 'low',
5: 'close',
6: 'value',
7: 'vol',
8: 'openint',
9: 'per',
10: 'open',
11: 'last',
}
url = 'http://www.tsetmc.com/tsev2/data/Export-txt.aspx?t=i&a=1&b=0&i=%s' % str(b)
r = requests.get(url)
main_text = r.text
df = pd.DataFrame([x.split(',') for x in main_text.split('\r\n')]).drop(0, axis=0)
data = df.rename(columns=col)
dd = data.drop(['ticker',
'date',
'first',
'high',
'low',
'value',
'vol',
'openint',
'per',
'open',
'last', ], axis=1)
dd['close'] = dd['close'].astype('float')
dd.at[dd.index, 'closen'] = dd['close'].shift(-1).astype('float')
for i in zip(dd['close'], dd['closen'], dd.index):
ln = 100 * math.log(i[0] / i[1])
dd.at[i[2], 'ln'] = ln
dd.drop(dd.loc[dd['ln'] >= 10].index, inplace=True)
dd.drop(dd.loc[dd['ln'] <= -10].index, inplace=True)
cc = dd.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
ff = cc.iloc[:132]
volatility = (np.std(ff['ln'], ddof=1)) * math.sqrt(245)
for p in zip(option['StockURL'], option.index):
if p[0] == b:
option.at[p[1], 'volatility'] = round(volatility, 1)
for a in zip(option['strike'], option['bp'], option['mature'], option['volatility'], option['stock'], option.index,option['pl'], option['s1']):
S = int(a[1])
K = int(a[0])
T = int(a[2])
r = 0.25
sigma = float(a[3]) / 100
q = 0
if T == 0:
d1 = 0
else:
d1 = (math.log(S / K) + ((r - q + (0.5 * (sigma ** 2))) * T / 365)) / (sigma * math.sqrt(T / 365))
d2 = (d1 - sigma * math.sqrt(T / 365))
if T == 0:
t1 = 0
else:
t1 = (np.log(S/K) + (r + sigma**2/2)* T)/(sigma*np.sqrt(T))
t2 = d1 - sigma * np.sqrt(T)
Nd1 = math.exp(-(d1**2) / 2)/math.sqrt(2*3.14)
option.at[a[5], 'gamma'] = round(Nd1 * math.exp(-q*T)/S*sigma*T**0.5)
option.at[a[5], 'vega'] = round(S*T**0.5*Nd1)
if a[2] == 0 and K >= S:
option.at[a[5], 'blackscholes'] = 0
elif a[2] == 0 and K < S:
option.at[a[5], 'blackscholes'] = S - K
else:
if a[4] == '311' or a[4] == '320':
call = (S * math.exp(-q * T / 365) * norm.cdf(d1) - K * math.exp(-r * T / 365) * norm.cdf(d2))
deltaCall = norm.cdf(d1)
option.at[a[5], 'blackscholes'] = round(call)
option.at[a[5], 'delta'] = round(deltaCall, 4)
option.at[a[5], 'rho'] = round(K*T*math.exp(-r*T)*norm.cdf(d2), 4)
option.at[a[5], 'theta'] = round(1/365*(-(S*sigma*math.exp(-q*T)*Nd1/2*T**0.5)-r*K*math.exp(-r*T)*norm.cdf(d2)+q*S*math.exp(-q*T)*norm.cdf(d1)), 4)
if int(a[7]) == 0:
option.at[a[5], 'deltaLever'] = round(S*deltaCall - int(a[6])/int(a[6]),2)
else:
option.at[a[5], 'deltaLever'] = round(S*deltaCall - int(a[7])/int(a[7]),2)
else:
put = (K * math.exp(-r * T / 365) * norm.cdf(-1 * d2) - S * math.exp(-q * T / 365) * norm.cdf(-1 * d1))
deltaPut = norm.cdf(d1)-1
option.at[a[5], 'blackscholes'] = round(put)
option.at[a[5], 'delta'] = round(deltaPut, 4)
option.at[a[5], 'rho'] = round(-K*T*math.exp(-r*T)*norm.cdf(-d2), 4)
option.at[a[5], 'theta'] = round(1/365*(-(S*sigma*math.exp(-q*T)*Nd1/2*T**0.5)+r*K*math.exp(-r*T)*norm.cdf(-d2)-q*S*math.exp(-q*T)*norm.cdf(-d1)), 4)
if int(a[7]) == 0:
option.at[a[5], 'deltaLever'] = round(S*deltaPut - int(a[6])/int(a[6]),2)
else:
option.at[a[5], 'deltaLever'] = round(S*deltaPut - int(a[7])/int(a[7]),2)
for a in zip(option['s1'], option['blackscholes'], option['pl'], option.index):
if int(a[0]) == 0 and int(a[1]) == 0:
option.at[a[3], 'SellBS'] = 0
elif int(a[0]) == 0 and int(a[1]) != 0:
option.at[a[3], 'SellBS'] = "بدون فروشنده"
elif int(a[0]) != 0 and int(a[1]) == 0:
option.at[a[3], 'SellBS'] = round(int(a[0]) * 100, 2)
else:
option.at[a[3], 'SellBS'] = round(((int(a[0]) - int(a[1])) / int(a[1])) * 100, 2)
for a in zip(option['b1'], option['blackscholes'], option['pl'], option.index):
if int(a[0]) == 0 and int(a[1]) == 0:
option.at[a[3], 'BuyBS'] = 0
elif int(a[0]) == 0 and int(a[1]) != 0:
option.at[a[3], 'BuyBS'] = "بدون خریدار"
elif int(a[0]) != 0 and int(a[1]) == 0:
option.at[a[3], 'BuyBS'] = round(int(a[0]) * 100, 2)
else:
option.at[a[3], 'BuyBS'] = round(((int(a[0]) - int(a[1])) / int(a[1])) * 100, 2)
for a in zip(option['s1'], option['strike'], option['pl'], option.index):
if int(a[0]) == 0:
option.at[a[3], 'lever'] = round((int(a[1]) / int(a[2])), 2)
else:
option.at[a[3], 'lever'] = round((int(a[1]) / int(a[0])), 2)
for a in zip(option['strike'], option['bp'], option['stock'], option.index,option['pl'], option['s1'], option['contract']):
if a[2] == '311' or a[2] == '320':
l = abs(min(int(a[1]) - int(a[0]), 0)) * int(a[6])
I1 = 0.2 * int(a[1]) * int(a[6]) - l
I2 = 0.1 * int(a[0]) * int(a[6])
V1 = (math.floor(max(I1, I2) / 100000) + 1) * 100000
if a[5] != '0':
V2 = int(a[5]) * int(a[6])
M = V1 + V2
option.at[a[3],'margin']=round(M)
option.at[a[3],'undif']= round(int(a[0])/int(a[5])-int(a[1]) - 1, 4)*100
else:
V2 = int(a[4]) * int(a[6])
M = V1 + V2
option.at[a[3],'margin']=round(M)
option.at[a[3],'undif']= round(int(a[0])/int(a[4])-int(a[1]) - 1, 4)*100
else:
l = abs(int(a[0]) - min(int(a[1]), 0)) * int(a[6])
I1 = 0.2 * int(a[1]) * int(a[6]) - l
I2 = 0.1 * int(a[0]) * int(a[6])
V1 = (math.floor(max(I1, I2) / 100000) + 1) * 100000
if a[5] != '0':
V2 = int(a[5]) * int(a[6])
M = V1 + V2
option.at[a[3],'margin']=round(M)
option.at[a[3],'undif']= round(int(a[0])/int(a[5])-int(a[1]) - 1, 4)*100
else:
V2 = int(a[4]) * int(a[6])
M = V1 + V2
option.at[a[3],'margin']=round(M)
option.at[a[3],'undif']= round(int(a[0])/int(a[4])-int(a[1]) - 1, 4)*100
end = time.time()
print(end - start)
我需要將這段代碼的執行時間減少到盡可能短的時間,但目前大約需要 35 秒。
由於所有這些循環,您的代碼很慢。 它超出了 scope 的單個問題和答案以實際修復所有代碼,但我可以告訴您如何修復它:
df['A'] * df['B']
即可立即獲得整個結果系列,而不是使用for
循環一次填充一個單元格。 通過使用向量化操作,您的程序將主要執行高度優化的已編譯庫代碼,而不是緩慢的 Python 代碼。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.