Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd from sklearn.model_selection
- import train_test_split from sklearn.linear_model
- import LinearRegression
- # Read the data
- url =
- 'https://raw.githubusercontent.com/codebasics/py/master/ML/1_linear_reg/Exer
- c ise/canada_per_capita_income.csv' data = pd.read_csv(url)
- # Prepare the data
- X = data["year"].values.reshape(-1, 1) # Features: Year y = data["per
- capita income (US$)"] # Target variable: Per capita income
- # Split the data into training and testing sets ( optional )
- # X_train, X_test, y_train, y_test = train_test_split(X, y,
- test_size=0.2, random_state=42)
- # Build the regression model
- model = LinearRegression()
- model.fit(X, y)
- # Make predictions for the year 2020 year_2020 =
- [[2020]] predicted_income_2020 =
- model.predict(year_2020)
- print("Predicted per capita income for Canada in 2020:",
- predicted_income_2020[0])
- import pandas as pd import seaborn as sns import
- matplotlib.pyplot as plt from sklearn.model_selection import
- train_test_split from sklearn.linear_model import
- LogisticRegression from sklearn.metrics import accuracy_score,
- classification_report
- # Step 1: Data Loading and Exploration
- # Load the dataset
- url =
- 'https://raw.githubusercontent.com/OnkarMalawade/MCA-SEM-II-
- AIML/main/HR_ comma_sep.csv' hr_data = pd.read_csv(url)
- # Explore the dataset
- print(hr_data.head())
- print(hr_data.info())
- print(hr_data.describe()) # Step 2:
- Identify Impactful Variables
- # Conduct exploratory data analysis
- # Step 3: Plot Bar Charts for Salary Impact
- sns.countplot(x='salary', hue='left', data=hr_data)
- plt.title('Impact of Salary on Retention')
- plt.xlabel('Salary Level') plt.ylabel('Count')
- plt.show()
- # Step 4: Plot Bar Charts for Department Impact
- sns.countplot(x='Department', hue='left', data=hr_data)
- plt.title('Impact of Department on Retention')
- plt.xlabel('Department') plt.ylabel('Count')
- plt.xticks(rotation=45) plt.show()
- # Step 5: Build Logistic Regression Model
- # Define predictors and target variable
- X = hr_data[['satisfaction_level', 'last_evaluation', 'number_project',
- 'average_montly_hours', 'time_spend_company', 'Work_accident',
- 'promotion_last_5years', 'salary', 'Department']]
- y = hr_data['left']
- # Convert categorical variables to dummy variables
- X = pd.get_dummies(X, drop_first=True)
- # Split the data into training and testing sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- # Build the logistic regression model
- model = LogisticRegression()
- model.fit(X_train, y_train)
- # Step 6: Model Evaluation #
- Make predictions y_pred =
- model.predict(X_test)
- # Calculate accuracy accuracy =
- accuracy_score(y_test, y_pred)
- print("Accuracy:", accuracy)
- # Print classification report
- print(classification_report(y_test, y_pred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement