Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 1. Data Set Up
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- import seaborn as sns
- import matplotlib.pyplot as plt
- from sklearn.metrics import roc_curve, roc_auc_score
- # Load Titanic dataset
- titanic = sns.load_dataset('titanic')
- # Keep only selected columns and drop rows with missing values
- df = titanic[['survived', 'sex', 'age', 'fare', 'pclass']].dropna()
- df['sex'] = df['sex'].map({'male': 0, 'female': 1})
- # Feature matrix and target vector
- X = df[['sex', 'age', 'fare', 'pclass']]
- y = df['survived']
- # Train-test split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- # Feature scaling
- scaler = StandardScaler()
- X_train_scaled = scaler.fit_transform(X_train)
- X_test_scaled = scaler.transform(X_test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement