Advertisement
rkrahul

linear, logistic reg

Jun 17th, 2024
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.76 KB | None | 0 0
  1. import pandas as pd from sklearn.model_selection
  2. import train_test_split from sklearn.linear_model
  3. import LinearRegression
  4. # Read the data
  5. url =
  6. 'https://raw.githubusercontent.com/codebasics/py/master/ML/1_linear_reg/Exer
  7. c ise/canada_per_capita_income.csv' data = pd.read_csv(url)
  8. # Prepare the data
  9. X = data["year"].values.reshape(-1, 1) # Features: Year y = data["per
  10. capita income (US$)"] # Target variable: Per capita income
  11. # Split the data into training and testing sets ( optional )
  12. # X_train, X_test, y_train, y_test = train_test_split(X, y,
  13. test_size=0.2, random_state=42)
  14. # Build the regression model
  15. model = LinearRegression()
  16. model.fit(X, y)
  17. # Make predictions for the year 2020 year_2020 =
  18. [[2020]] predicted_income_2020 =
  19. model.predict(year_2020)
  20. print("Predicted per capita income for Canada in 2020:",
  21. predicted_income_2020[0])
  22.  
  23.  
  24. import pandas as pd import seaborn as sns import
  25. matplotlib.pyplot as plt from sklearn.model_selection import
  26. train_test_split from sklearn.linear_model import
  27. LogisticRegression from sklearn.metrics import accuracy_score,
  28. classification_report
  29. # Step 1: Data Loading and Exploration
  30. # Load the dataset
  31. url =
  32. 'https://raw.githubusercontent.com/OnkarMalawade/MCA-SEM-II-
  33. AIML/main/HR_ comma_sep.csv' hr_data = pd.read_csv(url)
  34. # Explore the dataset
  35. print(hr_data.head())
  36. print(hr_data.info())
  37. print(hr_data.describe()) # Step 2:
  38. Identify Impactful Variables
  39. # Conduct exploratory data analysis
  40. # Step 3: Plot Bar Charts for Salary Impact
  41. sns.countplot(x='salary', hue='left', data=hr_data)
  42. plt.title('Impact of Salary on Retention')
  43. plt.xlabel('Salary Level') plt.ylabel('Count')
  44. plt.show()
  45. # Step 4: Plot Bar Charts for Department Impact
  46. sns.countplot(x='Department', hue='left', data=hr_data)
  47. plt.title('Impact of Department on Retention')
  48. plt.xlabel('Department') plt.ylabel('Count')
  49. plt.xticks(rotation=45) plt.show()
  50. # Step 5: Build Logistic Regression Model
  51. # Define predictors and target variable
  52. X = hr_data[['satisfaction_level', 'last_evaluation', 'number_project',
  53. 'average_montly_hours', 'time_spend_company', 'Work_accident',
  54. 'promotion_last_5years', 'salary', 'Department']]
  55. y = hr_data['left']
  56. # Convert categorical variables to dummy variables
  57. X = pd.get_dummies(X, drop_first=True)
  58. # Split the data into training and testing sets
  59. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  60. # Build the logistic regression model
  61. model = LogisticRegression()
  62. model.fit(X_train, y_train)
  63. # Step 6: Model Evaluation #
  64. Make predictions y_pred =
  65. model.predict(X_test)
  66. # Calculate accuracy accuracy =
  67. accuracy_score(y_test, y_pred)
  68. print("Accuracy:", accuracy)
  69. # Print classification report
  70. print(classification_report(y_test, y_pred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement