rkrahul

k-mean

Jun 17th, 2024
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | Source Code | 0 0
  1. from sklearn.cluster import KMeans
  2. import pandas as pd
  3. from sklearn.preprocessing import MinMaxScaler
  4. from matplotlib import pyplot as plt
  5. url='https://raw.githubusercontent.com/codebasics/py/master/ML/13_kmeans/i
  6. ncome.csv'
  7. df = pd.read_csv(url)
  8. df.head()
  9. plt.scatter(df.Age,df['Income($)'])
  10. plt.xlabel('Age')
  11. plt.ylabel('Income($)')
  12. ------------------------------------------------------------------------------------------------------------------------------------------
  13. km = KMeans(n_clusters=3)
  14. y_predicted = km.fit_predict(df[['Age','Income($)']])
  15. y_predicted
  16. df['cluster']=y_predicted
  17. df.head()
  18. km.cluster_centers_
  19. df1 = df[df.cluster==0]
  20. df2 = df[df.cluster==1]
  21. df3 = df[df.cluster==2]
  22. plt.scatter(df1.Age,df1['Income($)'],color='green')
  23. plt.scatter(df2.Age,df2['Income($)'],color='red')
  24. plt.scatter(df3.Age,df3['Income($)'],color='black')
  25. plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purpl
  26. e',marker='*',label=
  27. 'centroid')
  28. plt.xlabel('Age')
  29. plt.ylabel('Income ($)')
  30. plt.legend()
  31. scaler = MinMaxScaler()
  32. scaler.fit(df[['Income($)']])
  33. df['Income($)'] = scaler.transform(df[['Income($)']])
  34. scaler.fit(df[['Age']])
  35. df['Age'] = scaler.transform(df[['Age']])
  36. df.head()
  37. plt.scatter(df.Age,df['Income($)'])
  38.  
  39. km = KMeans(n_clusters=3)
  40. y_predicted = km.fit_predict(df[['Age','Income($)']])
  41. y_predicted
  42. df['cluster']=y_predicted
  43. df.head()
  44. km.cluster_centers_
  45. df1 = df[df.cluster==0]
  46. df2 = df[df.cluster==1]
  47. df3 = df[df.cluster==2]
  48. plt.scatter(df1.Age,df1['Income($)'],color='green')
  49. plt.scatter(df2.Age,df2['Income($)'],color='red')
  50. plt.scatter(df3.Age,df3['Income($)'],color='black')
  51. plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purpl
  52. e',marker='*',label=
  53. 'centroid')
  54. plt.legend()
Add Comment
Please, Sign In to add comment