df = pd.read_csv('train.csv')
df.head()
|
ID |
target |
48df886f9 |
0deb4b6a8 |
34b15f335 |
a8cb14b00 |
2f0771a37 |
30347e683 |
d08d1fbe3 |
6ee66e115 |
... |
3ecc09859 |
9281abeea |
8675bec0b |
3a13ed79a |
f677d4d13 |
71b203550 |
137efaa80 |
fb36b89d9 |
7e293fbaf |
9fc776466 |
0 |
000d6aaf2 |
38000000.0 |
0.0 |
0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
... |
0.0 |
0.0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
000fbd867 |
600000.0 |
0.0 |
0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
... |
0.0 |
0.0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
0027d6b71 |
10000000.0 |
0.0 |
0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
... |
0.0 |
0.0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
3 |
0028cbf45 |
2000000.0 |
0.0 |
0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
... |
0.0 |
0.0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
4 |
002a68644 |
14400000.0 |
0.0 |
0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
... |
0.0 |
0.0 |
0.0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
5 rows × 4993 columns
unique_vals = df.nunique().reset_index() #Doesn't count null values by default
unique_vals.columns = ["Column Name", "Uniqueness"]
unique_vals.head()
|
Column Name |
Uniqueness |
0 |
ID |
4459 |
1 |
target |
1413 |
2 |
48df886f9 |
32 |
3 |
0deb4b6a8 |
5 |
4 |
34b15f335 |
29 |