Advertisement
gk231192

Module 10.3 PCA

Jul 7th, 2025
323
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.15 KB | None | 0 0
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. from sklearn.preprocessing import StandardScaler
  4. from sklearn.decomposition import PCA
  5. from sklearn.manifold import TSNE
  6. import seaborn as sns
  7.  
  8. # Load Titanic dataset
  9. titanic = sns.load_dataset("titanic")
  10.  
  11. # Preview data
  12. print(titanic.head())
  13.  
  14. # Select numeric features
  15. features = ['age', 'fare', 'pclass', 'sibsp', 'parch']
  16. titanic_numeric = titanic[features]
  17.  
  18. # Drop rows with missing values
  19. titanic_numeric = titanic_numeric.dropna()
  20.  
  21. # Extract target (for visualization): Survived
  22. target = titanic.loc[titanic_numeric.index, 'survived']
  23.  
  24. # Standardize features
  25. scaler = StandardScaler()
  26. X_scaled = scaler.fit_transform(titanic_numeric)
  27.  
  28. # Apply PCA
  29. pca = PCA(n_components=2)
  30. X_pca = pca.fit_transform(X_scaled)
  31.  
  32. # Plot PCA
  33. plt.figure(figsize=(8, 6))
  34. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=target, cmap='coolwarm', alpha=0.6)
  35. plt.xlabel('Principal Component 1')
  36. plt.ylabel('Principal Component 2')
  37. plt.title('PCA on Titanic Data')
  38. plt.colorbar(label='Survived')
  39. plt.grid(True)
  40. plt.show()
  41.  
  42. # Print explained variance
  43. print("Explained variance ratio:", pca.explained_variance_ratio_)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement