Advertisement
GamerBhai02

DS Exp 4

Mar 26th, 2025 (edited)
227
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.57 KB | Source Code | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. data = [1,2,5,6,3,1,1,7,2,2,2,3,1,1,2,8,10,50]
  4. fig = plt.figure(figsize=(7,2))
  5. ax = fig.add_axes([0,0,1,1])
  6. bp = ax.boxplot(data,vert=False)
  7. plt.show()
  8. mean = np.mean(data)
  9. std = np.std(data)
  10. print('Mean of the dataset is',mean)
  11. print('Standard Deviation is',std)
  12. threshold = 3
  13. outlier = []
  14. for i in data:
  15.     z=(i-mean)/std
  16.     if z>threshold:
  17.         outlier.append(i)
  18. print(outlier)
  19.  
  20. data =  np.array([1,7,8,9,10,11,12,30])
  21. q1,q3=np.percentile(data,[25,75])
  22. iqr=q3-q1
  23. lower_bound = q1-1.5*iqr
  24. upper_bound = q3+1.5*iqr
  25. outliers=data[(data<lower_bound) | (data>upper_bound)]
  26. print(outliers)
  27.  
  28. import numpy as np
  29. import pandas as pd
  30. import seaborn as sns
  31. import matplotlib.pyplot as plt
  32. from sklearn.ensemble import IsolationForest
  33.  
  34. df = pd.read_csv("salary.csv")
  35. display(df.head())
  36.  
  37. sns.violinplot(df[['Salary']])
  38.  
  39. model = IsolationForest(n_estimators=50,max_samples='auto',contamination=float(0.1),max_features=1.0)
  40. model.fit(df[['Salary']])
  41.  
  42. df['scores'] = model.decision_function(df[['Salary']])
  43. df['anomaly'] = model.predict(df[['Salary']])
  44. display(df.head())
  45.  
  46. anomaly = df.loc[df['anomaly']==-1]
  47. anomaly_index = list(anomaly.index)
  48. display(anomaly)
  49.  
  50. import pandas as pd
  51. import numpy as np
  52. import seaborn as sns
  53. import matplotlib.pyplot as plt
  54.  
  55. # Generate data with outliers
  56. np.random.seed(42)
  57. data = pd.DataFrame({
  58.     'value': np.concatenate([np.random.normal(0, 1, 90), np.array([-5, 5, 10])])
  59. })
  60.  
  61. # --- Standard Deviation ---
  62. mean = data['value'].mean()
  63. std = data['value'].std()
  64. data_std_cleaned = data[(data['value'] >= mean - 2*std) & (data['value'] <= mean + 2*std)]
  65.  
  66. # --- IQR ---
  67. Q1 = data['value'].quantile(0.25)
  68. Q3 = data['value'].quantile(0.75)
  69. IQR = Q3 - Q1
  70. data_iqr_cleaned = data[(data['value'] >= Q1 - 1.5*IQR) & (data['value'] <= Q3 + 1.5*IQR)]
  71.  
  72. # --- Plotting ---
  73. plt.figure(figsize=(10, 5))
  74.  
  75. plt.subplot(1, 3, 1)
  76. sns.boxplot(y=data['value'])
  77. plt.title('Original Data')
  78.  
  79. plt.subplot(1, 3, 2)
  80. sns.boxplot(y=data_std_cleaned['value'])
  81. plt.title('Std Dev Cleaned')
  82.  
  83. plt.subplot(1, 3, 3)
  84. sns.boxplot(y=data_iqr_cleaned['value'])
  85. plt.title('IQR Cleaned')
  86.  
  87. plt.tight_layout()
  88. plt.show()
  89.  
  90. plt.figure(figsize=(10, 5))
  91.  
  92. plt.subplot(1, 3, 1)
  93. plt.scatter(data.index, data['value'])
  94. plt.title('Original Data')
  95.  
  96. plt.subplot(1, 3, 2)
  97. plt.scatter(data_std_cleaned.index, data_std_cleaned['value'])
  98. plt.title('Std Dev Cleaned')
  99.  
  100. plt.subplot(1, 3, 3)
  101. plt.scatter(data_iqr_cleaned.index, data_iqr_cleaned['value'])
  102. plt.title('IQR Cleaned')
  103.  
  104. plt.tight_layout()
  105. plt.show()
  106.  
Tags: Exp4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement