In [38]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
In [39]:
df = pd.read_csv("Data.csv")
In [40]:
df
Out[40]:
Country | Age | Salary | Purchased | |
---|---|---|---|---|
0 | France | 44.0 | 72000.0 | Yes |
1 | Spain | 27.0 | 48000.0 | Yes |
2 | NaN | 30.0 | 54000.0 | NaN |
3 | Spain | 38.0 | 61000.0 | No |
4 | Germany | 40.0 | NaN | Yes |
5 | France | 35.0 | 58000.0 | Yes |
6 | Spain | NaN | 52000.0 | No |
7 | France | 48.0 | 79000.0 | Yes |
8 | Germany | 50.0 | 83000.0 | No |
9 | France | 37.0 | 67000.0 | Yes |
1. Removing the Rows¶
In [41]:
df.dropna()
Out[41]:
Country | Age | Salary | Purchased | |
---|---|---|---|---|
0 | France | 44.0 | 72000.0 | Yes |
1 | Spain | 27.0 | 48000.0 | Yes |
3 | Spain | 38.0 | 61000.0 | No |
5 | France | 35.0 | 58000.0 | Yes |
7 | France | 48.0 | 79000.0 | Yes |
8 | Germany | 50.0 | 83000.0 | No |
9 | France | 37.0 | 67000.0 | Yes |
2. Imputers¶
In [42]:
imputer = SimpleImputer(missing_values = np.nan, strategy = 'most_frequent')
imputer.fit(df.iloc[:,1:3].values)
df.iloc[:,1:3] = imputer.transform(df.iloc[:,1:3].values)
# imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df
Out[42]:
Country | Age | Salary | Purchased | |
---|---|---|---|---|
0 | France | 44.0 | 72000.0 | Yes |
1 | Spain | 27.0 | 48000.0 | Yes |
2 | NaN | 30.0 | 54000.0 | NaN |
3 | Spain | 38.0 | 61000.0 | No |
4 | Germany | 40.0 | 48000.0 | Yes |
5 | France | 35.0 | 58000.0 | Yes |
6 | Spain | 27.0 | 52000.0 | No |
7 | France | 48.0 | 79000.0 | Yes |
8 | Germany | 50.0 | 83000.0 | No |
9 | France | 37.0 | 67000.0 | Yes |
In [43]:
imputer.fit(df.iloc[:,1:3].values)
Out[43]:
SimpleImputer(strategy='most_frequent')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SimpleImputer(strategy='most_frequent')
In [44]:
x = imputer.transform(df.iloc[:,1:3].values)
In [45]:
df.iloc[:,1:3]=x
In [46]:
df
Out[46]:
Country | Age | Salary | Purchased | |
---|---|---|---|---|
0 | France | 44.0 | 72000.0 | Yes |
1 | Spain | 27.0 | 48000.0 | Yes |
2 | NaN | 30.0 | 54000.0 | NaN |
3 | Spain | 38.0 | 61000.0 | No |
4 | Germany | 40.0 | 48000.0 | Yes |
5 | France | 35.0 | 58000.0 | Yes |
6 | Spain | 27.0 | 52000.0 | No |
7 | France | 48.0 | 79000.0 | Yes |
8 | Germany | 50.0 | 83000.0 | No |
9 | France | 37.0 | 67000.0 | Yes |
In [ ]: