Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- from sklearn.preprocessing import StandardScaler
- from sklearn.cluster import KMeans, AgglomerativeClustering
- from scipy.cluster.hierarchy import dendrogram, linkage
- # Load Titanic dataset
- titanic = sns.load_dataset("titanic")
- # Select numeric features
- features = ['age', 'fare', 'pclass', 'sibsp', 'parch']
- titanic_numeric = titanic[features]
- # Drop rows with missing values
- titanic_numeric = titanic_numeric.dropna()
- # Extract target (for visual comparison): Survived
- target = titanic.loc[titanic_numeric.index, 'survived']
- # Standardize features
- scaler = StandardScaler()
- X_scaled = scaler.fit_transform(titanic_numeric)
- # 1. KMeans Clustering
- kmeans = KMeans(n_clusters=3, random_state=42)
- kmeans_labels = kmeans.fit_predict(X_scaled)
- plt.figure(figsize=(6, 5))
- plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap='viridis', s=50)
- plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
- s=200, c='red', marker='X', label='Centroids')
- plt.title("K-Means Clustering on Titanic Dataset")
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement