boost.python：pandas dataframe 至 c++

Question

我想用boost.python在c++中使用多索引列dataframe。※多索引列dataframe就像

我将多索引列 dataframe 的类型更改为 csv。我的 csv 文件在电子表格上看起来像这样

我之所以要用这个数据是为了回测。 这是我在 python 中的回测代码，我想将其转换为 c++。

import pandas as pd
import numpy as np
from utils import load_data, load_list_csv, to_int

class No_Strategy():
  def __init__(self, codes, unit, cash, position):
    self.codes = codes
    self.unit = unit
    self.cash = cash
    self.buy_signal = [0]*len(codes)
    self.sell_signal = [0]*len(codes)
    self.valid = 0
    self.position = position
    self.pass_st = 0 # 전략에 들어가지도 못한 경우
    
  def set_data(self, prev_fs_row, fs_row, indi_row):
    self.prev_fs = prev_fs_row
    self.fs = fs_row # multi dimensional df
    self.indi = indi_row
  
  def _strat(self, prev_fs, curr_fs, curr_indi):
    curr_rev = prev_rev = curr_ni = prev_ni = ni_growth = curr_asset = noncurr_asset = curr_asset_rat = 0
    
    try:
      prev_rev = int(prev_fs['매출액'].replace(",",""))
      curr_rev = int(curr_fs['매출액'].replace(",",""))
    except:
      self.pass_st += 1
      return 0, 0
    rev_growth=(curr_rev-prev_rev)/prev_rev

    try:
      prev_ni = int(prev_fs['당기순이익'].replace(",",""))
      curr_ni = int(curr_fs['당기순이익'].replace(",",""))
    except:
      self.pass_st += 1
      return 0, 0
    ni_growth=(curr_ni-prev_ni)/prev_ni

    try:
      curr_asset = int(curr_fs['유동자산'].replace(",",""))
      noncurr_asset = int(curr_fs['비유동자산'].replace(",",""))
    except:
      self.pass_st += 1
      return 0, 0
    curr_asset_rat = curr_asset / noncurr_asset

    #### this is the buy strategy! You can change the below ####
    if (curr_indi.golden_cross) or (curr_indi.rsi_k < 0.65) :
      return 1, 0
    #### ************************************************** ####

    #### this is the sell strategy! You can change the below ####    
    if (curr_indi.dead_cross):
      return 0, 1
    #### ************************************************** ####

    return 0, 0

  def run(self):
    for i, code in enumerate(self.codes):
      self.valid = 0
      prev_fs = self.prev_fs[code]
      curr_fs = self.fs[code]
      curr_indi = self.indi[code]
      
      prev_fs_cell = None
      curr_fs_cell = None
      
      try:
        prev_fs_cell = prev_fs.iloc[0].replace(",","")
        try:
          curr_fs_cell = curr_fs.iloc[0].replace(",","")
        except:
          self.pass_st += 1
          pass
      except:
        self.pass_st += 1
        pass

      if (curr_fs_cell != None) & (prev_fs_cell != None):
        self.valid = 1
        buy, sell = self._strat(prev_fs, curr_fs, curr_indi)
      
      if self.valid == 0:
        self.pass_st += 1
        continue

      else: # buy or sell signal get 
        price = curr_indi['close']
        if buy:
          if self.cash >= self.unit * price:
            self.buy_signal[i] = self.unit
            self.position[i] += self.unit
            self.cash -= price * self.unit
        elif sell:
          if self.position[i] > 0 :
            sell_num = self.position[i] - int(self.position[i]/2)
            self.sell_signal[i] = sell_num
            self.position[i] = int(self.position[i]/2) # 1-> 1 sell, 4 -> 2 sell ....
            self.cash += price * sell_num


#@title
class Broker():
  def __init__(self, codes):
    self.cash = 200000000 #2억
    self.cash_df = None #pd.DataFrame(columns=['cash'])
    
    self.position = [0]*len(codes)
    self.position_df = None #pd.DataFrame(columns=codes) # for accumulated profit calculation
    
    self.buy_signal = None #pd.DataFrame(columns=codes) # codes = KOSPI_stock_names
    self.sell_signal = None #pd.DataFrame(columns=codes)
    self.codes = codes # 012934, 3281, ...
    self.unit = 1 # 주식 매매 단위

    self.pass_st = 0
    
  def set_strat(self, strategy):
    self.strategy = strategy # class
  
  def set_time(self, time_index): # time_index type: pd.Index / time range for indi df
    self.buy_signal = pd.DataFrame(columns = self.codes, index = time_index) #set_index(time_index)
    self.sell_signal = pd.DataFrame(columns = self.codes, index = time_index) #.set_index(time_index)
    self.position_df = pd.DataFrame(columns = self.codes, index = time_index)
    self.cash_df = pd.DataFrame(columns = ['cash'], index = time_index)#.set_index(time_index)
    self.time_index = time_index

  def set_data(self, fs, indi, price): 
    self.fs = fs # multi dimensional df / start: 0th - nth
    self.indi = indi # multi dimensional df / start : 1th - nth
    self.price = price # 2 dimensional (date X codes : close price)

  def update_data(self, strategy, date):
    self.cash = strategy.cash
    self.cash_df.loc[date] = strategy.cash 

    self.position = strategy.position    
    self.position_df.loc[date] = strategy.position #list

    self.buy_signal.loc[date] = strategy.buy_signal #list 
    self.sell_signal.loc[date] = strategy.sell_signal #list

    self.pass_st += strategy.pass_st
    
  def run(self):
    for date in self.time_index: #아마 수정해야 할 확률 높음
      if date.year == 2021:
        break
        
      else:  
        prev_fs_row = self.fs.loc[date.year-1] # ex: 2014
        fs_row = self.fs.loc[date.year] # 2015
        indi_row = self.indi.loc[date] # 2015
        
        strategy = self.strategy(self.codes, self.unit, self.cash, self.position)

        strategy.set_data(prev_fs_row, fs_row, indi_row)
        strategy.run()
        self.update_data(strategy, date)
    

  def performance(self):
    # !!!! 2020년까지의 결과만 성능 평가 ####
    cash_df = self.cash_df[self.cash_df.index < '2021']
    position_df = self.position_df[self.position_df.index < '2021']
    price = self.price[self.price.index < '2021']
    buy_signal = self.buy_signal[self.buy_signal.index < '2021']
    sell_signal = self.sell_signal[self.sell_signal.index < '2021']
    
    last_price = price.iloc[-1]
    total_remain_num = self.position # last(2020) position data
    
    total_buy = (price * buy_signal).sum(axis=1).sum()
    total_sell = (price * sell_signal).sum(axis=1).sum()
    total_remain = (last_price * total_remain_num).sum()
    print(f'remain 개수: {total_remain_num}, total_remain: {total_remain} total_buy: {total_buy}, total_sell={total_sell}')
    
    profit = total_sell + total_remain - total_buy
    try:
      return_mean = profit / total_buy 
    except:
      print("no buy")
      return 
    
    accum_df = (cash_df['cash'] + ((price.fillna(0) * position_df).sum(axis=1))).to_frame() # row sum 
    daily_return_df = (accum_df - accum_df.shift(1))/accum_df.shift(1)-1
    SSE = ((daily_return_df - return_mean)**2).sum().item()
    std = np.sqrt(SSE/(accum_df.shape[0]-1)) # route(sigma(x-x_bar)^2 / (n-1))
    
    sharp = return_mean / std

    self.return_mean = return_mean
    self.sharp = sharp

    print(f'return_mean: {return_mean}, sharp: {sharp}')

code_path = GDRIVE_DATA_PATH + 'codes.csv'
fs_path = GDRIVE_DATA_PATH + 'fs_total.csv'
indi_path = GDRIVE_DATA_PATH + 'indi_total.csv'
price_path = GDRIVE_DATA_PATH + 'prices.csv'

fs_total = load_data("fs_total.csv")
indi_total = load_data("indi_total.csv") # stock price and indicator(Golden cross, RSI, etc.)
prices = load_data("prices.csv") # stock close price data rows:date, cols: stock code.
time_index = indi_total.index # time index of indi_total multi-index columns 

broker = Broker(codes)
broker.set_strat(No_Strategy)
broker.set_time(time_index)
broker.set_data(fs_total, indi_total, prices)
broker.run()
broker.performance()

我想翻译它而不改变代码流。 但是我找不到如何在c++中获取多索引列dataframe，并将其行数据传递给No_Strategy来决定是否投资股票。

※ 我之前上传过类似的问题，得到了感谢的回答，但对我来说太复杂了，所以我再问一次，并提供详细信息。

Answer 1

看看https://github.com/hosseinmoein/DataFrame 。 它在一个更快的框架中拥有 Pandas 大约 95% 的功能

boost.python：pandas dataframe 至 c++

问题描述

1 个解决方案

解决方案1
0 2022-11-22 18:43:53

boost.python：pandas dataframe 至 c++

问题描述

1 个解决方案

解决方案1 0 2022-11-22 18:43:53

解决方案1
0 2022-11-22 18:43:53