I'm developing a interactive dashboard using Plotly Dash, which takes an Excel workbook as an input, formats the data into a pandas dataframe and displays as a bar graph.
It works well with a single workbook but when I add a variable to allow for multiple works to be loaded and concatenated into one long dataframe and visualized I am running into a persistence issue. Where the data is kept after the browser is refreshed, even though storage_type
is set to memory
per the documentation.
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
dfmeans = []
app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
dcc.Store(id='stored-data', storage_type='memory'),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
I suspect this is because I have declared the list variable df_means =[]
outside of the main function but that's the only place I have been able to get it to work. When I place it inside the parse_contents()
function the data is replaced each time I add a new workbook.
Has anyone out there successfully implemented the Dash Upload component dcc.Upload
taking multiple workbooks/excel files as an input? The documentation out there on uploading more that one file is really sparse from what I can find. Full code here -
import base64
import datetime
import io
import re
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import plotly.express as px
import pandas as pd
from read_workbook import *
import pdb
suppress_callback_exceptions=True
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
dfmeans = []
app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
dcc.Store(id='stored-data', storage_type='memory'),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-div'),
html.Div(id='output-datatable'),
])
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
workbook_xl = pd.ExcelFile(io.BytesIO(decoded))
# print(workbook_xl)
#aggregates all months data into a single data frame
def get_all_months(workbook_xl):
months = ['July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'June']
xl_file = pd.ExcelFile(workbook_xl)
months_data = []
for month in months:
months_data.append(get_month_dataframe(xl_file, month))
print(months_data)
return pd.concat(months_data)
#run get all months function and produce behavior dataframe
df = get_all_months(workbook_xl)
#convert episode values to float and aggregate mean per shift
df['value'] = df['value'].astype(float)
dfmean = df.groupby(['Date', 'variable'],sort=False,)['value'].mean().round(2).reset_index()
dfmeans.append(dfmean)
dfmean = pd.concat(dfmeans)
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return html.Div([
html.H5(filename),
# html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
data=dfmean.to_dict('records'),
columns=[{'name': i, 'id': i} for i in dfmean.columns],
page_size=15
),
dcc.Store(id='stored-data', data=dfmean.to_dict('records')),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
@app.callback(Output('output-datatable', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
@app.callback(Output('output-div', 'children'),
Input('stored-data','data'))
def make_graphs(data):
df_agg = pd.DataFrame(data)
# df_agg['Date'] = pd.to_datetime(df_agg['Date'])
if df_agg.empty:
print("Dataframe epmty")
else:
bar_fig = px.bar(df_agg, x=df_agg['Date'], y=df_agg['value'], color = 'variable',barmode='group')
return dcc.Graph(figure=bar_fig)
if __name__ == '__main__':
app.run_server(debug=True)
Defining dfmeans
outside the scope of callbacks will definitely make your data persistent until you kill the server because it is treated as a global variable. According to Dash documentation :
One of the core Dash principles explained in the Getting Started Guide on Callbacks is that Dash Callbacks must never modify variables outside of their scope. It is not safe to modify any global variables. This chapter explains why and provides some alternative patterns for sharing state between callbacks.
One alternative would be to create a global store component to store dfmeans
and pass its state to update_output
such that it gets appended every time a new file is uploaded:
@app.callback(Output('output-datatable', 'children'),
Output('global-stored-data', 'data')
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'),
State('global-stored-data', 'data'))
def update_output(list_of_contents, list_of_names, list_of_dates, global_stored_data):
dfmeans = [pd.DataFrame(data) for data in global_stored_data]
if list_of_contents is not None:
children = [
parse_contents(c, n, d, dfmeans) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
global_stored_data = [df.to_dict('records') for df in dfmeans]
return children, global_stored_data
else:
return dash.no_update
The global store should be created with storage_type='memory'
, such that its content is not persisent when you refresh the page.
That being said, I noticed that children
, the output of update_output
, is a list of html.Div()
, each returned by parse_contents
. However, part of the content of each Div
is dcc.Store(id='stored-data', data=dfmean.to_dict('records'))
, so multiple instances of dcc.Store
with the same id stored-data
are output simultaneously, doesn't that generate an error? Unless I misunderstood your layout, I think you have only one graph (with multiple data file contents overlayed in it), so I think you should revise that part of code to use only one dcc.Store
for the concatenated data, as suggested above.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.