import pandas as pd
from scipy.stats import spearmanr
import numpy as np
import warnings #Used primarily to ignore warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('train.csv')
labels = []
values = []
for col in df.columns:
if col not in ["ID", "target"]:
labels.append(col)
values.append(spearmanr(df[col].values, df['target'].values)[0])
correlation_df = pd.DataFrame({'column_label':labels, 'correlation_val':values})
correlation_df = correlation_df.sort_values(by='correlation_val')
correlation_df = correlation_df[(correlation_df['correlation_val']>0.1) | (correlation_df['correlation_val']<-0.1)]
correlation_df
|
column_label |
correlation_val |
| 216 |
77eb013ca |
-0.116095 |
| 1908 |
a60027bb4 |
-0.115835 |
| 1378 |
3adf5e2b5 |
-0.114185 |
| 220 |
186b87c05 |
-0.113428 |
| 2232 |
f8b733d3f |
-0.113011 |
| 2158 |
715fa74a4 |
-0.112752 |
| 2471 |
08af3dd45 |
-0.112729 |
| 3595 |
7b1ddbabf |
-0.112540 |
| 2102 |
adadb9a96 |
-0.112109 |
| 2870 |
8485abcab |
-0.111304 |
| 4852 |
c7ae29e66 |
-0.110687 |
| 3600 |
4f2f6b0b3 |
-0.110345 |
| 4772 |
67f9e982f |
-0.110242 |
| 2341 |
e7071d5e3 |
-0.109869 |
| 4152 |
e17f1f07c |
-0.109022 |
| 3275 |
f41f0eb2f |
-0.108897 |
| 3767 |
fbe52b1b2 |
-0.108612 |
| 672 |
f2520b601 |
-0.108505 |
| 2974 |
cd8048913 |
-0.108488 |
| 2574 |
2c136905e |
-0.108038 |
| 3602 |
e5ac02d3c |
-0.106720 |
| 3852 |
994b4c2ac |
-0.106573 |
| 3066 |
cb162bd89 |
-0.106288 |
| 552 |
1d79bc053 |
-0.105551 |
| 1392 |
dd85a900c |
-0.105311 |
| 757 |
08d203407 |
-0.105278 |
| 1129 |
cbf236577 |
-0.104954 |
| 1968 |
28dc3cc44 |
-0.104916 |
| 2320 |
a8ef2a0d2 |
-0.104837 |
| 3117 |
45cda25bb |
-0.104755 |
| 1722 |
fd9968f0d |
-0.104548 |
| 1238 |
89db78d8e |
-0.104448 |
| 4667 |
9e2040e5b |
-0.104247 |
| 1224 |
b6fa5a5fd |
-0.104106 |
| 299 |
fa6e76901 |
-0.103114 |
| 774 |
83e2ae51c |
-0.102465 |
| 651 |
e9c7ccc05 |
-0.102174 |
| 229 |
0c4bf4863 |
-0.101714 |
| 2999 |
13d853d22 |
-0.101657 |
| 3922 |
0eebebc7c |
-0.101501 |
| 332 |
707f193d9 |
-0.101385 |
| 1041 |
5a88e3d89 |
-0.100786 |
| 2425 |
ea397d576 |
-0.100696 |
| 2417 |
912f4f5de |
-0.100464 |
| 4875 |
896d1c52d |
-0.100381 |
| 1990 |
e2b4d4ef7 |
-0.100337 |
| 4178 |
06b19b6c4 |
-0.100202 |
| 4358 |
f190486d6 |
0.107678 |