Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import StandardScaler
- from sklearn.decomposition import PCA
- from sklearn.manifold import TSNE
- import seaborn as sns
- # Load Titanic dataset
- titanic = sns.load_dataset("titanic")
- # Preview data
- print(titanic.head())
- # Select numeric features
- features = ['age', 'fare', 'pclass', 'sibsp', 'parch']
- titanic_numeric = titanic[features]
- # Drop rows with missing values
- titanic_numeric = titanic_numeric.dropna()
- # Extract target (for visualization): Survived
- target = titanic.loc[titanic_numeric.index, 'survived']
- # Standardize features
- scaler = StandardScaler()
- X_scaled = scaler.fit_transform(titanic_numeric)
- # Apply PCA
- pca = PCA(n_components=2)
- X_pca = pca.fit_transform(X_scaled)
- # Plot PCA
- plt.figure(figsize=(8, 6))
- plt.scatter(X_pca[:, 0], X_pca[:, 1], c=target, cmap='coolwarm', alpha=0.6)
- plt.xlabel('Principal Component 1')
- plt.ylabel('Principal Component 2')
- plt.title('PCA on Titanic Data')
- plt.colorbar(label='Survived')
- plt.grid(True)
- plt.show()
- # Print explained variance
- print("Explained variance ratio:", pca.explained_variance_ratio_)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement