import pandas as pd
from scipy.stats import spearmanr
import numpy as np
import warnings #Used primarily to ignore warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('train.csv')
labels = []
values = []
for col in df.columns:
if col not in ["ID", "target"]:
labels.append(col)
values.append(spearmanr(df[col].values, df['target'].values)[0])
correlation_df = pd.DataFrame({'column_label':labels, 'correlation_val':values})
correlation_df = correlation_df.sort_values(by='correlation_val')
correlation_df = correlation_df[(correlation_df['correlation_val']>0.1) | (correlation_df['correlation_val']<-0.1)]
correlation_df
|
column_label |
correlation_val |
216 |
77eb013ca |
-0.116095 |
1908 |
a60027bb4 |
-0.115835 |
1378 |
3adf5e2b5 |
-0.114185 |
220 |
186b87c05 |
-0.113428 |
2232 |
f8b733d3f |
-0.113011 |
2158 |
715fa74a4 |
-0.112752 |
2471 |
08af3dd45 |
-0.112729 |
3595 |
7b1ddbabf |
-0.112540 |
2102 |
adadb9a96 |
-0.112109 |
2870 |
8485abcab |
-0.111304 |
4852 |
c7ae29e66 |
-0.110687 |
3600 |
4f2f6b0b3 |
-0.110345 |
4772 |
67f9e982f |
-0.110242 |
2341 |
e7071d5e3 |
-0.109869 |
4152 |
e17f1f07c |
-0.109022 |
3275 |
f41f0eb2f |
-0.108897 |
3767 |
fbe52b1b2 |
-0.108612 |
672 |
f2520b601 |
-0.108505 |
2974 |
cd8048913 |
-0.108488 |
2574 |
2c136905e |
-0.108038 |
3602 |
e5ac02d3c |
-0.106720 |
3852 |
994b4c2ac |
-0.106573 |
3066 |
cb162bd89 |
-0.106288 |
552 |
1d79bc053 |
-0.105551 |
1392 |
dd85a900c |
-0.105311 |
757 |
08d203407 |
-0.105278 |
1129 |
cbf236577 |
-0.104954 |
1968 |
28dc3cc44 |
-0.104916 |
2320 |
a8ef2a0d2 |
-0.104837 |
3117 |
45cda25bb |
-0.104755 |
1722 |
fd9968f0d |
-0.104548 |
1238 |
89db78d8e |
-0.104448 |
4667 |
9e2040e5b |
-0.104247 |
1224 |
b6fa5a5fd |
-0.104106 |
299 |
fa6e76901 |
-0.103114 |
774 |
83e2ae51c |
-0.102465 |
651 |
e9c7ccc05 |
-0.102174 |
229 |
0c4bf4863 |
-0.101714 |
2999 |
13d853d22 |
-0.101657 |
3922 |
0eebebc7c |
-0.101501 |
332 |
707f193d9 |
-0.101385 |
1041 |
5a88e3d89 |
-0.100786 |
2425 |
ea397d576 |
-0.100696 |
2417 |
912f4f5de |
-0.100464 |
4875 |
896d1c52d |
-0.100381 |
1990 |
e2b4d4ef7 |
-0.100337 |
4178 |
06b19b6c4 |
-0.100202 |
4358 |
f190486d6 |
0.107678 |