Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from numpy import isnan
- from pandas import read_csv
- from sklearn.impute import SimpleImputer
- url = "https://raw.githubusercontent.com/jbrownlee/Datasets/refs/heads/master/horse-colic.csv"
- df = read_csv(url, header=None, na_values='?')
- data = df.values
- for i in range(df.shape[1]):
- n_miss = df[[i]].isnull().sum()
- perc = n_miss / df.shape[0] * 100
- print('> %d, Missing: %d (%.1f%%)' % (i, n_miss, perc))
- imputer = SimpleImputer(strategy='mean')
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
- from numpy import isnan
- from pandas import read_csv
- from sklearn.experimental import enable_iterative_imputer
- from sklearn.impute import IterativeImputer
- url = "https://raw.githubusercontent.com/jbrownlee/Datasets/refs/heads/master/horse-colic.csv"
- df = read_csv(url, header=None, na_values='?')
- data = df.values
- for i in range(df.shape[1]):
- n_miss = df[[i]].isnull().sum()
- perc = n_miss / df.shape[0] * 100
- print('> %d, Missing: %d (%.1f%%)' % (i, n_miss, perc))
- imputer = IterativeImputer()
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
- from numpy import isnan
- from pandas import read_csv
- from sklearn.impute import KNNImputer
- url = "https://raw.githubusercontent.com/jbrownlee/Datasets/refs/heads/master/horse-colic.csv"
- df = read_csv(url, header=None, na_values='?')
- data = df.values
- for i in range(df.shape[1]):
- n_miss = df[[i]].isnull().sum()
- perc = n_miss / df.shape[0] * 100
- print('> %d, Missing: %d (%.1f%%)' % (i, n_miss, perc))
- imputer = KNNImputer()
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
- #ASSIGNMENT 8
- from numpy import isnan
- from pandas import read_csv
- from sklearn.experimental import enable_iterative_imputer
- from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer
- url = "https://raw.githubusercontent.com/jbrownlee/Datasets/refs/heads/master/pima-indians-diabetes.data.csv"
- df = read_csv(url, header=None, na_values='?')
- data = df.values
- for i in range(df.shape[1]):
- n_miss = df[[i]].isnull().sum()
- perc = n_miss / df.shape[0] * 100
- print('> %d, Missing: %d (%.1f%%)' % (i, n_miss, perc))
- imputer = SimpleImputer(strategy='mean')
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
- imputer = IterativeImputer()
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
- imputer = KNNImputer()
- imputer.fit(data)
- Xtrans = imputer.transform(data)
- print('Missing: %d' % sum(isnan(Xtrans).flatten()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement