Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- data = [1,2,5,6,3,1,1,7,2,2,2,3,1,1,2,8,10,50]
- fig = plt.figure(figsize=(7,2))
- ax = fig.add_axes([0,0,1,1])
- bp = ax.boxplot(data,vert=False)
- plt.show()
- mean = np.mean(data)
- std = np.std(data)
- print('Mean of the dataset is',mean)
- print('Standard Deviation is',std)
- threshold = 3
- outlier = []
- for i in data:
- z=(i-mean)/std
- if z>threshold:
- outlier.append(i)
- print(outlier)
- data = np.array([1,7,8,9,10,11,12,30])
- q1,q3=np.percentile(data,[25,75])
- iqr=q3-q1
- lower_bound = q1-1.5*iqr
- upper_bound = q3+1.5*iqr
- outliers=data[(data<lower_bound) | (data>upper_bound)]
- print(outliers)
- import numpy as np
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- from sklearn.ensemble import IsolationForest
- df = pd.read_csv("salary.csv")
- display(df.head())
- sns.violinplot(df[['Salary']])
- model = IsolationForest(n_estimators=50,max_samples='auto',contamination=float(0.1),max_features=1.0)
- model.fit(df[['Salary']])
- df['scores'] = model.decision_function(df[['Salary']])
- df['anomaly'] = model.predict(df[['Salary']])
- display(df.head())
- anomaly = df.loc[df['anomaly']==-1]
- anomaly_index = list(anomaly.index)
- display(anomaly)
- import pandas as pd
- import numpy as np
- import seaborn as sns
- import matplotlib.pyplot as plt
- # Generate data with outliers
- np.random.seed(42)
- data = pd.DataFrame({
- 'value': np.concatenate([np.random.normal(0, 1, 90), np.array([-5, 5, 10])])
- })
- # --- Standard Deviation ---
- mean = data['value'].mean()
- std = data['value'].std()
- data_std_cleaned = data[(data['value'] >= mean - 2*std) & (data['value'] <= mean + 2*std)]
- # --- IQR ---
- Q1 = data['value'].quantile(0.25)
- Q3 = data['value'].quantile(0.75)
- IQR = Q3 - Q1
- data_iqr_cleaned = data[(data['value'] >= Q1 - 1.5*IQR) & (data['value'] <= Q3 + 1.5*IQR)]
- # --- Plotting ---
- plt.figure(figsize=(10, 5))
- plt.subplot(1, 3, 1)
- sns.boxplot(y=data['value'])
- plt.title('Original Data')
- plt.subplot(1, 3, 2)
- sns.boxplot(y=data_std_cleaned['value'])
- plt.title('Std Dev Cleaned')
- plt.subplot(1, 3, 3)
- sns.boxplot(y=data_iqr_cleaned['value'])
- plt.title('IQR Cleaned')
- plt.tight_layout()
- plt.show()
- plt.figure(figsize=(10, 5))
- plt.subplot(1, 3, 1)
- plt.scatter(data.index, data['value'])
- plt.title('Original Data')
- plt.subplot(1, 3, 2)
- plt.scatter(data_std_cleaned.index, data_std_cleaned['value'])
- plt.title('Std Dev Cleaned')
- plt.subplot(1, 3, 3)
- plt.scatter(data_iqr_cleaned.index, data_iqr_cleaned['value'])
- plt.title('IQR Cleaned')
- plt.tight_layout()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement