[英]How do I do a time series analysis as part of running a function in Python 3.7?
我有這個:
formulas_count_stats.py:
import pandas as pd
from df_count_stats import df, df1
df = df
df1 = df1
class Data_load_compare_0:
def __init__(self, df):
self.df = pd.read_csv(df, delimiter=';')
'''
Data information section from df = basic stats
'''
def get_EDA_columns(self):
return self.df.columns
def get_EDA_info(self):
return self.df.info()
def get_EDA_describe(self):
return self.df.describe()
def get_EDA_shape(self):
return self.df.shape
def get_EDA_value_counts(self):
return self.df.value_counts()
def get_EDA_isnull(self):
return self.df.isnull()
def get_EDA_dtypes(self):
return self.df.dtypes
def get_EDA_isna(self):
return self.df.isna()
def get_EDA_nunique(self):
return self.df.nunique()
def get_EDA_sort_dipl(self):
return self.df.query("col1 == 'X'")
def get_EDA_sort_bach(self):
return self.df.query("col1 == 'Y'")
def get_EDA_sort_by_line(self):
return self.df.groupby(['col2', 'col1', 'col3']).agg(['count'])# groupby(['User Name', 'col2'])['col1'].size().reset_index(name='counts')
'''
Time series
'''
import matplotlib.pyplot as plt
def get_time_series(self):
df['Logon Time'] = pd.to_datetime(df['Logon Time'], errors='coerce')
df['Year'] = df.index.dt.year
df['month'] = df.index.dt.month
df['day'] = df.inde.dt.day
df['hour'] = df.index.dt.hour
df['week'] = df.index.dt.week
df['count'] = df['User Name']
return df.groupby([df['Logon Time'].dt.year, df['Logon Time'].dt.month]).sum().plot.bar()
plt.show()
...並從 main_count_stats.py 運行函數(從 df_count_stats.py 加載數據):
from df_count_stats import df_load, df1_load
from formulas_count_stats import Data_load_compare_0, Data_load_compare_1
myData = Data_load_compare_0(df_load)
myData1 = Data_load_compare_1(df1_load)
EDA_stats_00_0 = myData.get_EDA_columns()
EDA_stats_01_0 = myData.get_EDA_nunique()
EDA_stats_02_0 = myData.get_EDA_shape()
EDA_stats_03_0 = myData.get_EDA_info()
EDA_stats_04_0 = myData.get_EDA_isna()
EDA_stats_05_0 = myData.get_EDA_isnull()
EDA_stats_06_0 = myData.get_EDA_describe()
EDA_stats_07_0 = myData.get_EDA_dtypes()
EDA_stats_08_0 = myData.get_EDA_sort_bach()
EDA_stats_09_0 = myData.get_EDA_sort_dipl()
EDA_stats_10_0 = myData.get_EDA_sort_by_line()
EDA_stats_11_0 = myData.get_time_series()
我收到此錯誤:
Traceback (most recent call last):
File "C:/.../.../main_count_stats.py", line 25, in <module>
EDA_stats_11_0 = myData.get_time_series()
File "C:\...\...\...\formulas_count_stats.py", line 59, in get_time_series
df['Year'] = df.index.dt.year
AttributeError: 'RangeIndex' object has no attribute 'dt'
我希望我嘗試將簡單的時間序列分析作為其他功能良好的 formulas_count_stats.py 的一部分進行整合。 顯然,事實並非如此。 我確實將索引更改為“to_datetime”格式。
我該如何解決這個問題?
或者您可以將索引轉換為Series
並使用.dt
:
def get_time_series(self):
self.df['Logon Time'] = pd.to_datetime(df['Logon Time'], errors='coerce')
self.df[['Year', 'month', 'day', 'hour', 'week']] = (pd.Series(df.index)
.dt.strftime('%Y-%m-%d-%H-%W')
.str.split('-', expand=True).astype(int)).values
self.df['count'] = df['User Name']
return self.df.groupby([df['Logon Time'].dt.year, df['Logon Time'].dt.month]).sum().plot.bar()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.