Advertisement
GamerBhai02

DS Exp 3

Mar 19th, 2025 (edited)
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.10 KB | Source Code | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import KFold
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. from sklearn.linear_model import LogisticRegression
  5. from sklearn.metrics import accuracy_score, classification_report
  6.  
  7. file_path = 'C:\\Users\\Test\\Desktop\\spam_data.csv'
  8. data = pd.read_csv(file_path, encoding='latin1')
  9.  
  10. data['v1'] = data['v1'].map({'ham': 0, 'spam': 1})
  11.  
  12. X = data['v2']
  13. y = data['v1']
  14.  
  15. vectorizer = CountVectorizer()
  16. X_vectorized = vectorizer.fit_transform(X)
  17.  
  18. kf = KFold(n_splits=10)
  19. accuracies = []
  20.  
  21. for train_index, test_index in kf.split(X_vectorized):
  22.     X_train, X_test = X_vectorized[train_index], X_vectorized[test_index]
  23.     y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  24.  
  25.     model = LogisticRegression(max_iter=1000)
  26.     model.fit(X_train, y_train)
  27.  
  28.     y_pred = model.predict(X_test)
  29.  
  30.     accuracy = accuracy_score(y_test, y_pred)
  31.     accuracies.append(accuracy)
  32.  
  33.     print(classification_report(y_test, y_pred, target_names=['ham', 'spam']))
  34.  
  35. print(f'Average accuracy over 10 folds: {sum(accuracies) / len(accuracies):.4f}')
Tags: Exp 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement