samedi 25 juin 2016

Matplot lib class constructor

I am trying to turn a script ipython notebook into a true object-oriented program. I set all my class and method with succees (not complicated method). However, I still have to complete a final class. I have to include a chart with matplotlib. I tried lots of combinations in the constructor to define my objects but nothing to do I am lost. For example, I have: self.pos = pos, and python return me: name 'pos' is not defined. I really don't understand why. Can you advise me on the elements that I have to put in my constructor? So, my original script is below. class RfmDataViz(object): #Dataviz of the clustering # Constructor def __init__(self,pos): self.pos = pos # End of the constructor Constructor def RfmDataVizPlot ( self ): # Define the position of the bar self.pos = list(range(len(avant_viz[attribut_monetaire]))) width = 0.25 # Draw the bar fig, ax = plt.subplots(figsize=(14,5)) # Draw the bar for the monetary value , # In position pos ,, #with the attribute [''] in dataframe avant_viz, avant_viz[exc.m], # setting the width width, # setting the alpha alpha=0.5, # setting color color='#EE3224', # setting label label=avant_viz['cluster'][0]) # Draw the bar for the recency value ,[p + width for p in self.pos], avant_viz[exc.r], width, alpha=0.5, color='#F78F1E', label=avant_viz['cluster'][1]) # Draw the bar for the frequency value ,[p + width*2 for p in self.pos], avant_viz[exc.f], width, alpha=0.5, color='#FFC222', label=avant_viz['cluster'][2]) # define label for Y ax.set_ylabel('Score') # Title of my graph ax.set_title('Médiane score RFM par Cluster') # setting the position of the label on y ax.set_xticks([p + 1.5 * width for p in self.pos]) # define label for x ax.set_xticklabels(avant_viz['cluster']) # define the limit of axe x et axe y plt.xlim(min(self.pos)-width, max(self.pos)+width*4) plt.ylim([0, max(avant_viz['Monetary_Score'] + avant_viz['Recency_Score'] + avant_viz['Recency_Score'])] ) # add legend plt.legend(['Monetary_Score', 'Recency_Score', 'Recency_Score'], loc='upper left') plt.grid() MAIN # -*- coding: utf-8 -*- """ Created on Thu Jun 23 16:53:39 2016 ? @author: """ # -*- coding: utf -*- #call the module import LoadingFile as EM import clustering as C import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.cluster import KMeans import sys import os #Loading the class exc = EM.XlsLoading() rfmc = C.RfmClustering() #Loading and Transform xls in a pandas dataframe------------------------------ exc.LoadingXlsFile() #Display xls file into a pandas dataframe xls_file = pd.ExcelFile(exc.namefile) #Select the good sheets for pandas dataframe exc.XlsSelectTheGoodSheet() customersdf = xls_file.parse(exc.yoursheet) headers = customersdf.dtypes.index print (headers) #Feature Selection for clustering exc.SelectFeaturesForClustering() customersdf = customersdf[[exc.r,exc.f,exc.m]] print ( #Clustering----------------------------------------------------------------- rfmc.DoKmeans() clustering_model = KMeans(n_clusters=rfmc.nb_cls,precompute_distances=True) #Fit Kmeans clusters = clustering_model.fit_predict(customersdf) #print de inertia print ("Inertia",clustering_model.inertia_) #Print the silhouette from sklearn.metrics import silhouette_score silhouette = silhouette_score(customersdf.values, clusters, metric='euclidean', sample_size=2000) print ("Silhouette score :", silhouette) # Creation of the final dataframe final = customersdf.join(pd.Series(clusters, index=customersdf.index, name='cluster')) final['cluster'] = final['cluster'].map(lambda cluster_id: 'cluster' + str(cluster_id)) #Print Cluster Size print ("--------------------") print ("Clusters size") print ("--------------------") taille = pd.DataFrame({'size': final['cluster'].value_counts()}) print (taille) #Préparing the final file before downloading customersid = xls_file.parse(exc.yoursheet) customersid = customersid.drop([exc.r,exc.f,exc.m], axis=1) final_clustering = pd.concat([customersid, final], axis=1) final_clustering = final_clustering[[,exc.r,exc.f,exc.m,'cluster']] #Median per clusters print ("-------------------------------------") print ("Clusters Median") print ("-------------------------------------") avant_viz = final_clustering.groupby(['cluster',]).aggregate(np.mean).reset_index() print (avant_viz) #EXPORT FINAL CLUSTERING CUSTOMER LIST IN CSV final_clustering.to_csv('customersrfmclustering.csv')

