简体   繁体   中英

How can make subplots of columns in Pandas dataframe in one window inside of for-loop

* Please help it's very important: Why is not possible to get subplots of cloumns of Pandas dataframe by using HeatMap inside of for-loop?

I am trying to create subplots of columns in pandas dataframe inside of for-loop during iterations since I plot result for every cycle that is for each 480 values to get all 3 subplots belong to A, B, C side by side in one window. I've found only one answer here which I'm afraid is not my case! @euri10 answered by using flat .

My scripts are following:

# Import and call the needed libraries
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt


'''
Take a list and create the formatted matrix
'''
def mkdf(ListOf480Numbers):
    normalMatrix = np.array_split(ListOf480Numbers,8)     #Take a list and create 8 array (Sections)
    fixMatrix = []
    for i in range(8):
        lines = np.array_split(normalMatrix[i],6)         #Split each section in lines (each line contains 10 cells from 0-9)
        newMatrix = [0,0,0,0,0,0]                         #Empty array to contain reordered lines
        for j in (1,3,5):
            newMatrix[j] = lines[j]                       #lines 1,3,5 remain equal
        for j in (0,2,4):
            newMatrix[j] = lines[j][::-1]                 #lines 2,4,6 are inverted
        fixMatrix.append(newMatrix)                 #After last update of format of table inverted (bottom-up zig-zag)
    return fixMatrix

'''
Print the matrix with the required format
'''
def print_df(fixMatrix):
    values = []
    for i in range(6):
        values.append([*fixMatrix[4][i], *fixMatrix[7][i]])  #lines form section 6 and 7 are side by side
    for i in range(6):
        values.append([*fixMatrix[5][i], *fixMatrix[6][i]])  #lines form section 4 and 5 are side by side
    for i in range(6):
        values.append([*fixMatrix[1][i], *fixMatrix[2][i]])  #lines form section 2 and 3 are side by side
    for i in range(6):
        values.append([*fixMatrix[0][i], *fixMatrix[3][i]])  #lines form section 0 and 1 are side by side
    df = pd.DataFrame(values)
    return (df)

'''
Normalizing Formula
'''

def normalize(value, min_value, max_value, min_norm, max_norm):
    new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
    return new_value

'''
Split data in three different lists A, B and C
'''

dft = pd.read_csv('D:\me4.TXT', header=None)
id_set = dft[dft.index % 4 == 0].astype('int').values
A = dft[dft.index % 4 == 1].values
B = dft[dft.index % 4 == 2].values
C = dft[dft.index % 4 == 3].values
data = {'A': A[:,0], 'B': B[:,0], 'C': C[:,0]}
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])  


'''
Data generation phase

'''

#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for i in df:
    try:
        os.mkdir(i)
    except:
        pass
    min_val = df[i].min()
    min_nor = -1
    max_val = df[i].max()
    max_nor = 1
    for cycle in range(1):             #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
        count =  '{:04}'.format(cycle)
        j = cycle * 480
        ordered_data = mkdf(df.iloc[j:j+480][i])
        csv = print_df(ordered_data)
        #Print .csv files contains matrix of each parameters by name of cycles respectively
        csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)            
        if 'C' in i:
            min_nor = -40
            max_nor = 150
            #Applying normalization for C between [-40,+150]
            new_value3 = normalize(df['C'].iloc[j:j+480][i].values, min_val, max_val, -40, 150)
            n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
            df3 = print_df(mkdf(new_value3))
        else:
            #Applying normalizayion for A,B between    [-1,+1]
            new_value1 = normalize(df['A'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
            new_value2 = normalize(df['B'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
            n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
        df1 = print_df(mkdf(new_value1))
        df2 = print_df(mkdf(new_value2))    

        #Plotting parameters by using HeatMap
        plt.figure()
        sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)                             
        plt.title(i, fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')
        #Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
        plt.savefig(f'{i}/{i}{count}.png')  



        #plotting all columns ['A','B','C'] in-one-window side by side


        fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))

        plt.subplot(131)
        sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
        fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
        plt.title('A', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        plt.subplot(132)
        sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
        fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
        #sns.despine(left=True)
        plt.title('B', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')

        plt.subplot(133)
        sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]}) 
        fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
        #sns.despine(left=True)
        plt.title('C', fontsize=12, color='black', loc='left', style='italic')
        plt.axis('off')


        plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
        plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
        #plt.subplot_tool()
        plt.savefig(f'{i}/{i}{i}{count}.png') 
        plt.show()

So far I couldn't get proper output due to in each cycle it prints plot each of them 3 times in different intervals eg. it prints 'A' left then again it prints 'A' under the name of 'B' and 'C' in middle and right in-one-window. Again it prints 'B' 3-times instead once and put it middle and in the end it prints 'C' 3-times instead of once and put in right side it put in middle and left!

Target is to catch subplots of all 3 columns A,B & C in one-window for each cycle (every 480-values by 480-values) in main for-loop!

1st cycle : 0000 -----> subplots of A,B,C ----> Store it as 0000.png

2nd cycle : 0001 -----> subplots of A,B,C ----> Store it as 0001.png ...

Problem is usage of df inside of for-loop and it passes values of A or B or C 3 times while it should pass it values belong to each column once respectively I provide a picture of unsuccessful output here so that you could see exactly where the problem is clearly

my desired output is below:

图片

I also provide sample text file of dataset for 3 cycles: dataset

So after looking at your code and and your requirements I think I know what the problem is. Your for loops are in the wrong order. You want a new figure for each cycle, containing each 'A', 'B' and 'C' as subplots.

This means your outer loop should go over the cycles and then your inner loop over i , whereas your indentation and order of the loops makes you trying to plot all 'A','B','C' subplots already on your first loop through i ( i='A' , cycle=1 ) and not after your first loop through the first cycle, with all i ( i='A','B','C' , cycle=1 ).

This is also why you get the problem (as mentioned in your comment on this answer ) of not defining df3. The definition of df3 ist in an if block checking if 'C' in i , on your first loop through, this condition is not met and therefore df3 is not defined, but you are still trying to plot it!

Also you got the same problem as in your other question with the NaN/inf values again.

Rearraning the for loops and the indentation and cleaning up the NaN/inf values gets you the following code:

#...
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])  
df = df.replace(np.inf, np.nan)
df = df.fillna(0)

'''
Data generation phase

'''

#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for cycle in range(cycles):             #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
    count =  '{:04}'.format(cycle)
    j = cycle * 480
    for i in df:
        try:
            os.mkdir(i)
        except:
            pass

        min_val = df[i].min()
        min_nor = -1
        max_val = df[i].max()
        max_nor = 1

        ordered_data = mkdf(df.iloc[j:j+480][i])
        csv = print_df(ordered_data)
        #Print .csv files contains matrix of each parameters by name of cycles respectively
        csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)            
        if 'C' in i:
            min_nor = -40
            max_nor = 150
            #Applying normalization for C between [-40,+150]
            new_value3 = normalize(df['C'].iloc[j:j+480], min_val, max_val, -40, 150)
            n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
            df3 = print_df(mkdf(new_value3))
        else:
            #Applying normalizayion for A,B between    [-1,+1]
            new_value1 = normalize(df['A'].iloc[j:j+480], min_val, max_val, -1, 1)
            new_value2 = normalize(df['B'].iloc[j:j+480], min_val, max_val, -1, 1)
            n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
            df1 = print_df(mkdf(new_value1))
            df2 = print_df(mkdf(new_value2))    

    #        #Plotting parameters by using HeatMap
    #        plt.figure()
    #        sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)                             
    #        plt.title(i, fontsize=12, color='black', loc='left', style='italic')
    #        plt.axis('off')
    #        #Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
    #        plt.savefig(f'{i}/{i}{count}.png')  


    #plotting all columns ['A','B','C'] in-one-window side by side
    fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))

    plt.subplot(131)
    sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
    fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
    plt.title('A', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')

    plt.subplot(132)
    sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
    fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
    #sns.despine(left=True)
    plt.title('B', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')

    plt.subplot(133)
    sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]}) 
    fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
    #sns.despine(left=True)
    plt.title('C', fontsize=12, color='black', loc='left', style='italic')
    plt.axis('off')


    plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
    plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
    #plt.subplot_tool()
    plt.savefig(f'{i}/{i}{i}{count}.png') 
    plt.show()

This gets you the following three images as three seperate figures with the data you provided:

Figure 1 , Figure 2 , Figure 3

Generally speaking, your code is quite messy. I get it, if you're new to programming and just want to analyse your data, you do whatever works, doesn't matter if it is pretty.

However, I think that the messy code means you cant properly look at the underlying logic of your script, which is how you got this problem.

I would recommend if you get a problem like that again to write out some 'pseudo code' with all of the loops and try to think about what you are trying to accomplish in each loop.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM