I am trying to turn a script ipython notebook into a true object-oriented program. I set all my class and method with succees (not complicated method).
However, I still have to complete a final class. I have to include a chart with matplotlib. I tried lots of combinations in the constructor to define my objects but nothing to do I am lost.
For example, I have: self.pos = pos, and python return me: name 'pos' is not defined. I really don't understand why.
Can you advise me on the elements that I have to put in my constructor?
So, my original script is below.
class RfmDataViz(object): #Dataviz of the clustering
# Constructor
def __init__(self,pos):
self.pos = pos
# End of the constructor Constructor
def RfmDataVizPlot ( self ):
# Define the position of the bar
self.pos = list(range(len(avant_viz[attribut_monetaire])))
width = 0.25
# Draw the bar
fig, ax = plt.subplots(figsize=(14,5))
# Draw the bar for the monetary value ,
# In position pos ,
plt.bar(self.pos,
#with the attribute ['exc.id'] in dataframe avant_viz,
avant_viz[exc.m],
# setting the width
width,
# setting the alpha
alpha=0.5,
# setting color
color='#EE3224',
# setting label
label=avant_viz['cluster'][0])
# Draw the bar for the recency value ,
plt.bar([p + width for p in self.pos],
avant_viz[exc.r],
width,
alpha=0.5,
color='#F78F1E',
label=avant_viz['cluster'][1])
# Draw the bar for the frequency value ,
plt.bar([p + width*2 for p in self.pos],
avant_viz[exc.f],
width,
alpha=0.5,
color='#FFC222',
label=avant_viz['cluster'][2])
# define label for Y
ax.set_ylabel('Score')
# Title of my graph
ax.set_title('Médiane score RFM par Cluster')
# setting the position of the label on y
ax.set_xticks([p + 1.5 * width for p in self.pos])
# define label for x
ax.set_xticklabels(avant_viz['cluster'])
# define the limit of axe x et axe y
plt.xlim(min(self.pos)-width, max(self.pos)+width*4)
plt.ylim([0, max(avant_viz['Monetary_Score'] + avant_viz['Recency_Score'] + avant_viz['Recency_Score'])] )
# add legend
plt.legend(['Monetary_Score', 'Recency_Score', 'Recency_Score'], loc='upper left')
plt.grid()
plt.show()
MAIN
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 23 16:53:39 2016
?
@author:
"""
# -*- coding: utf -*-
#call the module
import LoadingFile as EM
import clustering as C
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import sys
import os
#Loading the class
exc = EM.XlsLoading()
rfmc = C.RfmClustering()
#Loading and Transform xls in a pandas dataframe------------------------------
exc.LoadingXlsFile()
#Display xls file into a pandas dataframe
xls_file = pd.ExcelFile(exc.namefile)
#Select the good sheets for pandas dataframe
exc.XlsSelectTheGoodSheet()
customersdf = xls_file.parse(exc.yoursheet)
headers = customersdf.dtypes.index
print (headers)
#Feature Selection for clustering
exc.SelectFeaturesForClustering()
customersdf = customersdf[[exc.r,exc.f,exc.m]]
print (customersdf.info())
#Clustering-----------------------------------------------------------------
rfmc.DoKmeans()
clustering_model = KMeans(n_clusters=rfmc.nb_cls,precompute_distances=True)
#Fit Kmeans
clusters = clustering_model.fit_predict(customersdf)
#print de inertia
print ("Inertia",clustering_model.inertia_)
#Print the silhouette
from sklearn.metrics import silhouette_score
silhouette = silhouette_score(customersdf.values, clusters, metric='euclidean', sample_size=2000)
print ("Silhouette score :", silhouette)
# Creation of the final dataframe
final = customersdf.join(pd.Series(clusters, index=customersdf.index, name='cluster'))
final['cluster'] = final['cluster'].map(lambda cluster_id: 'cluster' + str(cluster_id))
#Print Cluster Size
print ("--------------------")
print ("Clusters size")
print ("--------------------")
taille = pd.DataFrame({'size': final['cluster'].value_counts()})
print (taille)
#Préparing the final file before downloading
customersid = xls_file.parse(exc.yoursheet)
customersid = customersid.drop([exc.r,exc.f,exc.m], axis=1)
final_clustering = pd.concat([customersid, final], axis=1)
final_clustering = final_clustering[[exc.id,exc.r,exc.f,exc.m,'cluster']]
#Median per clusters
print ("-------------------------------------")
print ("Clusters Median")
print ("-------------------------------------")
avant_viz = final_clustering.groupby(['cluster',]).aggregate(np.mean).reset_index()
print (avant_viz)
#EXPORT FINAL CLUSTERING CUSTOMER LIST IN CSV
final_clustering.to_csv('customersrfmclustering.csv')
Aucun commentaire:
Enregistrer un commentaire