Advertisement
gk231192

Module 10.2 Means

Jul 7th, 2025 (edited)
350
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.14 KB | None | 0 0
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import seaborn as sns
  4. from sklearn.preprocessing import StandardScaler
  5. from sklearn.cluster import KMeans, AgglomerativeClustering
  6. from scipy.cluster.hierarchy import dendrogram, linkage
  7.  
  8. # Load Titanic dataset
  9. titanic = sns.load_dataset("titanic")
  10.  
  11. # Select numeric features
  12. features = ['age', 'fare', 'pclass', 'sibsp', 'parch']
  13. titanic_numeric = titanic[features]
  14.  
  15. # Drop rows with missing values
  16. titanic_numeric = titanic_numeric.dropna()
  17.  
  18. # Extract target (for visual comparison): Survived
  19. target = titanic.loc[titanic_numeric.index, 'survived']
  20.  
  21. # Standardize features
  22. scaler = StandardScaler()
  23. X_scaled = scaler.fit_transform(titanic_numeric)
  24.  
  25. # 1. KMeans Clustering
  26. kmeans = KMeans(n_clusters=3, random_state=42)
  27. kmeans_labels = kmeans.fit_predict(X_scaled)
  28.  
  29. plt.figure(figsize=(6, 5))
  30. plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap='viridis', s=50)
  31. plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
  32.             s=200, c='red', marker='X', label='Centroids')
  33. plt.title("K-Means Clustering on Titanic Dataset")
  34. plt.legend()
  35. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement