本文共 1066 字,大约阅读时间需要 3 分钟。
import pandas as pdimport numpy as npimport scipy.stats as ssimport matplotlib.pyplot as pltimport seaborn as snsdf = pd.read_csv("./data/HR.csv")#获得以department分组后的索引值的数组dictdp_indices = df.groupby("department").indices sales_values = df["left"].iloc[dp_indices["sales"]].valuestechnical_values = df["left"].iloc[dp_indices["technical"]].valuesprint(ss.ttest_ind(sales_values,technical_values)[1])dp_keys = list(dp_indices.keys())dp_t_mat = np.zeros([len(dp_keys),len(dp_keys)])for i in range(len(dp_keys)): for j in range(len(dp_keys)): p_value = ss.ttest_ind(df["left"].iloc[dp_indices[dp_keys[i]]].values, df["left"].iloc[dp_indices[dp_keys[j]]].values)[1] dp_t_mat[i][j] = p_valuesns.heatmap(dp_t_mat,xticklabels=dp_keys,yticklabels=dp_keys)plt.show()
piv_tb = pd.pivot_table(df,values="left",index=["promotion_last_5years","salary"], columns=["Work_accident"],aggfunc=np.mean)print(piv_tb)
sns.heatmap(piv_tb,vmin=0,vmax=1)plt.show()
转载地址:http://zdjka.baihongyu.com/