Module 10.2 Means

gk231192

Jul 7th, 2025 (edited)

350

Never

Add comment

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

Python 1.14 KB | None | 0 0

raw download clone embed print report

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
# Load Titanic dataset
titanic = sns.load_dataset("titanic")
# Select numeric features
features = ['age', 'fare', 'pclass', 'sibsp', 'parch']
titanic_numeric = titanic[features]
# Drop rows with missing values
titanic_numeric = titanic_numeric.dropna()
# Extract target (for visual comparison): Survived
target = titanic.loc[titanic_numeric.index, 'survived']
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(titanic_numeric)
# 1. KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)
plt.figure(figsize=(6, 5))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans_labels, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
s=200, c='red', marker='X', label='Centroids')
plt.title("K-Means Clustering on Titanic Dataset")
plt.legend()
plt.show()

Add Comment

Please, Sign In to add comment