Commit 50acdff6 authored by Leonie Pick's avatar Leonie Pick

New plots for decision boundary and calibration curve

parent 32773479
This diff is collapsed.
......@@ -30,4 +30,5 @@ from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.calibration import calibration_curve
......@@ -709,7 +709,7 @@ def Get_MyScores(CM_means, n_classes):
HSS = 2*(TP*TN-FN*FP)/((TP+FN)*(FN+TN)+(TP+FP)*(FP+TN))
MCC = (TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
BM = TPR+TNR-1; MK = PPV+NPV-1
BM = TPR+TNR-1; MK = PPV+NPV-1
Scores = [TPR,FNR,FPR,TNR,PPV,FDR,FOR,NPV,Prevalence,ACC,LRp,LRn,DOR,F1a,F1b,FB,HSS,MCC,BM,MK]
......@@ -718,15 +718,19 @@ def Get_MyScores(CM_means, n_classes):
###
def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
## ROC, Precision-Recall curves, confusion matrix
curve_i = np.linspace(0,1,100)
#curve_i = np.linspace(0,1,101)
# ROC, Precision-Recall curves, confusion matrix
#curve_i = np.linspace(0,1,100)
thresholds = np.linspace(0,1,101)
Curves = np.zeros((N2,K2,2,100))
CM = np.zeros((N2,K2,n_classes**2,2))
## Performance scores
Scorers = np.zeros((N2,K2,22,2))
#Curves = np.zeros((N2,K2,2,100))
CM = np.zeros((N2,K2,n_classes**2,2,len(thresholds)))
# Performance scores
Scorers = np.zeros((N2,K2,22,2,len(thresholds)))
# Calibration
n_bins = 11
Calib = np.zeros((N2,K2,3,n_bins-1))
for i in range(N2):
CV2 = StratifiedKFold(n_splits=K2, shuffle=True, random_state=i)
......@@ -745,49 +749,87 @@ def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
proba_train = modelTest.predict_proba(D_train2) # train score
pred_train = modelTest.predict(D_train2)
## Precision-Recall
precision_curve0, recall_curve0, thresPR0 = metrics.precision_recall_curve(T_test2,proba_test[:,1],pos_label=1)
Scorers[i,ii,0,0] = metrics.auc(recall_curve0,precision_curve0)
precision_curve1, recall_curve1, thresPR1 = metrics.precision_recall_curve(T_train2,proba_train[:,1],pos_label=1)
Scorers[i,ii,0,1] = metrics.auc(recall_curve1,precision_curve1)
recall_curve, index_u = np.unique(recall_curve0,return_index=True); precision_curve = precision_curve0[index_u]
precision_i = interp(curve_i,recall_curve,precision_curve)
## ROC curve
fpr0,tpr0,thresROC0 = metrics.roc_curve(T_test2,proba_test[:,1],pos_label=1)
Scorers[i,ii,1,0] = metrics.auc(fpr0,tpr0)
fpr1,tpr1,thresROC1 = metrics.roc_curve(T_train2,proba_train[:,1],pos_label=1)
Scorers[i,ii,1,1] = metrics.auc(fpr1,tpr1)
tpi_i = interp(curve_i,fpr0,tpr0)
Curves[i,ii,0,:] = precision_i; Curves[i,ii,1,:] = tpi_i
## NEW
for j in range(len(thresholds)):
class1_test = np.where(proba_test[:,1] > thresholds[j])[0]
pred_test_thres = np.zeros(len(pred_test))
pred_test_thres[class1_test] = 1
class1_train = np.where(proba_train[:,1] > thresholds[j])[0]
pred_train_thres = np.zeros(len(pred_train))
pred_train_thres[class1_train] = 1
#CM_thres = metrics.confusion_matrix(T_test2, pred_test_thres, labels=range(n_classes)).ravel()
#P = CM_thres[3]+CM_thres[2]; PP = CM_thres[3]+CM_thres[1]; N = CM_thres[0]+CM_thres[1]
#Scores_thres[i,ii,0,j] = CM_thres[3]/P #TPR
#Scores_thres[i,ii,1,j] = CM_thres[1]/N #FPR
#Scores_thres[i,ii,2,j] = CM_thres[3]/PP #PPV
###
# ## Precision-Recall
# precision_curve0, recall_curve0, thresPR0 = metrics.precision_recall_curve(T_test2,proba_test[:,1],pos_label=1)
# Scorers[i,ii,0,0] = metrics.auc(recall_curve0,precision_curve0)
# precision_curve1, recall_curve1, thresPR1 = metrics.precision_recall_curve(T_train2,proba_train[:,1],pos_label=1)
# Scorers[i,ii,0,1] = metrics.auc(recall_curve1,precision_curve1)
# recall_curve, index_u = np.unique(recall_curve0,return_index=True); precision_curve = precision_curve0[index_u]
# precision_i = interp(curve_i,recall_curve,precision_curve)
# ## ROC curve
# fpr0,tpr0,thresROC0 = metrics.roc_curve(T_test2,proba_test[:,1],pos_label=1,drop_intermediate=False)
# Scorers[i,ii,1,0] = metrics.auc(fpr0,tpr0)
# fpr1,tpr1,thresROC1 = metrics.roc_curve(T_train2,proba_train[:,1],pos_label=1)
# Scorers[i,ii,1,1] = metrics.auc(fpr1,tpr1)
# tpi_i = interp(curve_i,fpr0,tpr0)
# Curves[i,ii,0,:] = precision_i; Curves[i,ii,1,:] = tpi_i
## Confusion matrix
CM[i,ii,:,0] = metrics.confusion_matrix(T_test2, pred_test, labels=range(n_classes)).ravel()
CM[i,ii,:,1] = metrics.confusion_matrix(T_train2, pred_train, labels=range(n_classes)).ravel()
## Confusion matrix
CM[i,ii,:,0,j] = metrics.confusion_matrix(T_test2, pred_test_thres, labels=range(n_classes)).ravel()
CM[i,ii,:,1,j] = metrics.confusion_matrix(T_train2, pred_train_thres, labels=range(n_classes)).ravel()
## Get scorers
C, Scorers[i,ii,2:,0] = Get_MyScores(CM[i,ii,:,0],n_classes)
C, Scorers[i,ii,2:,1] = Get_MyScores(CM[i,ii,:,1],n_classes)
## Get scorers
C_tmp, Scorers[i,ii,2:,0,j] = Get_MyScores(CM[i,ii,:,0,j],n_classes)
C_tmp, Scorers[i,ii,2:,1,j] = Get_MyScores(CM[i,ii,:,1,j],n_classes)
if n_classes > 2:
Scorers[i,ii,19,0,j] = metrics.matthews_corrcoef(T_test2,pred_test_thres)
Scorers[i,ii,19,1,j] = metrics.matthews_corrcoef(T_train2,pred_train_thres)
# Calibration curve
Calib[i,ii,0,:], Calib[i,ii,1,:] = calibration_curve(T_test2, proba_test[:,1], normalize=False, n_bins=n_bins)
Calib[i,ii,2,:], bin_edges = np.histogram(proba_test[:,1], bins=np.arange(0,1.1,0.1))
# AUC scores
Scorers[i,ii,6,0,np.isnan(Scorers[i,ii,6,0,:])] = 1; Scorers[i,ii,6,1,np.isnan(Scorers[i,ii,6,1,:])] = 1
Scorers[i,ii,0,0,:] = metrics.auc(Scorers[i,ii,4,0,:],Scorers[i,ii,2,0,:]); Scorers[i,ii,0,1,:] = metrics.auc(Scorers[i,ii,4,1,:],Scorers[i,ii,2,1,:]) #ROC
Scorers[i,ii,1,0,:] = metrics.auc(Scorers[i,ii,2,0,:],Scorers[i,ii,6,0,:]); Scorers[i,ii,1,1,:] = metrics.auc(Scorers[i,ii,2,1,:],Scorers[i,ii,6,1,:]) #P-R
if n_classes > 2:
Scorers[i,ii,19,0] = metrics.matthews_corrcoef(T_test2,pred_test)
Scorers[i,ii,19,0] = metrics.matthews_corrcoef(T_train2,pred_train)
ii += 1
## Scores
Scorers_means = np.mean(Scorers,axis=1)
Model_Mean = np.around(np.mean(Scorers_means,axis=0),4)
Model_Std = np.around(np.std(Scorers_means,axis=0),4)
Scorers_means = np.nanmean(Scorers,axis=1)
Model_Mean = np.around(np.nanmean(Scorers_means,axis=0),4)
Model_Std = np.around(np.nanstd(Scorers_means,axis=0),4)
## Sum confusion matrix
CM_means_i = np.sum(CM,axis=1); CM_means = np.around(np.mean(CM_means_i,axis=0),0)
C, Scores = Get_MyScores(CM_means[:,0], n_classes)
## Decide on decision boundary
print('best MCC:',np.nanmax(Model_Mean[19,0,:]))
print('best decision test:',thresholds[np.nanargmax(Model_Mean[19,0,:])])
print('best decision train:',thresholds[np.nanargmax(Model_Mean[19,1,:])])
decision_boundary = int(thresholds[np.nanargmax(Model_Mean[19,0,:])]*100)
C, Scores = Get_MyScores(CM_means[:,0,decision_boundary], n_classes)
# Plot mean confusion matrix and curves
if Plot == True:
pl.CM(CM_means[:,0],C,n_classes,Save,SaveName)
pl.Curves(N2,K2,Curves,curve_i,Model_Mean[2:7,0],Model_Mean[0:2,0],Model_Std[0:2,0],C,Save,SaveName)
return Model_Mean, Model_Std
pl.CM(CM_means[:,0,decision_boundary],C,n_classes,Save,SaveName)
##pl.Curves(N2,K2,Curves,curve_i,Model_Mean[2:7,0],Model_Mean[0:2,0],Model_Std[0:2,0],C,Save,SaveName)
pl.Curves(N2,K2,Scorers,decision_boundary,Model_Mean[0:2,0,:],Model_Std[0:2,0,:],C,Save,SaveName)
pl.Decision(thresholds, Model_Mean[:,0,:], Model_Std[:,0,:],Save,SaveName)
pl.Calibration(Calib,bin_edges,Save,SaveName)
return Model_Mean[:,:,decision_boundary], Model_Std[:,:,decision_boundary]
###
###
......@@ -389,7 +389,8 @@ def CM(CM,Condition,n_classes,Save,SaveName):
plt.show()
###
###
def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
#def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
def Curves(N2,K2,Scorers,decision,Model_Mean,Model_Std,C,Save,SaveName):
fig, axs = plt.subplots(1,2,facecolor='w',edgecolor='k',sharey=True)
fig.set_size_inches([14,6], forward=True)
......@@ -398,8 +399,8 @@ def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
axs[0].axis('equal')
axs[1].axis('equal')
precision = Curves[:,:,0,:]
tpr = Curves[:,:,1,:]
#precision = Curves[:,:,0,:]
#tpr = Curves[:,:,1,:]
for i in range(N2):
for ii in range(K2):
......@@ -407,27 +408,37 @@ def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
#tpr[i,ii,0] = 0.0; tpr[i,ii,-1] = 1.0
#precision[i,ii,0] = 1.0; precision[i,ii,-1] = 0.0
if i== 0 and ii == 0:
axs[0].plot(curve_i,tpr[i,ii,:],color='gray',label='1 fold',linewidth=0.5,zorder=0)
#axs[0].plot(curve_i,tpr[i,ii,:],color='gray',label='1 fold',linewidth=0.5,zorder=0)
axs[0].plot(Scorers[i,ii,4,0,:],Scorers[i,ii,2,0,:],color='gray',label='1 fold',linewidth=0.5,zorder=0)
else:
axs[0].plot(curve_i,tpr[i,ii,:],color='gray',linewidth=0.5,zorder=0)
axs[1].plot(curve_i,precision[i,ii,:],color='gray',linewidth=0.5,zorder=0)
ROC_inner = np.mean(tpr[i,:,:],axis=0)
PR_inner = np.mean(precision[i,:,:],axis=0)
#axs[0].plot(curve_i,tpr[i,ii,:],color='gray',linewidth=0.5,zorder=0)
axs[0].plot(Scorers[i,ii,4,0,:],Scorers[i,ii,2,0,:],color='gray',linewidth=0.5,zorder=0)
#axs[1].plot(curve_i,precision[i,ii,:],color='gray',linewidth=0.5,zorder=0)
axs[1].plot(Scorers[i,ii,2,0,:],Scorers[i,ii,6,0,:],color='gray',linewidth=0.5,zorder=0)
Scorers_inner = np.nanmean(Scorers[i,:,:,:,:],axis=0)
#ROC_inner = np.mean(tpr[i,:,:],axis=0)
#PR_inner = np.mean(precision[i,:,:],axis=0)
if i == 0:
axs[0].plot(curve_i,ROC_inner,color='black',label='Split mean (4 folds)')
#axs[0].plot(curve_i,ROC_inner,color='black',label='Split mean (4 folds)')
axs[0].plot(Scorers_inner[4,0,:],Scorers_inner[2,0,:],color='black',label='Split mean (4 folds)')
else:
axs[0].plot(curve_i,ROC_inner,color='black',zorder=1)
axs[1].plot(curve_i,PR_inner,color='black',zorder=1)
#axs[0].plot(curve_i,ROC_inner,color='black',zorder=1)
axs[0].plot(Scorers_inner[4,0,:],Scorers_inner[2,0,:],color='black',zorder=1)
#axs[1].plot(curve_i,PR_inner,color='black',zorder=1)
axs[1].plot(Scorers_inner[2,0,:],Scorers_inner[6,0,:],color='black',zorder=1)
ROC_inner = np.nanmean(tpr,axis=1); ROC_outer = np.nanmean(ROC_inner,axis=0)
PR_inner = np.nanmean(precision,axis=1); PR_outer = np.nanmean(PR_inner,axis=0)
#ROC_inner = np.nanmean(tpr,axis=1); ROC_outer = np.nanmean(ROC_inner,axis=0)
#PR_inner = np.nanmean(precision,axis=1); PR_outer = np.nanmean(PR_inner,axis=0)
Scorers_inner = np.nanmean(Scorers,axis=1); Scorers_outer = np.nanmean(Scorers_inner,axis=0)
axs[0].plot(curve_i,ROC_outer,color='maroon',label='Total mean',zorder=2)
axs[1].plot(curve_i,PR_outer,color='maroon',label='Total mean',zorder=2)
#axs[0].plot(curve_i,ROC_outer,color='maroon',label='Total mean',zorder=2)
#axs[1].plot(curve_i,PR_outer,color='maroon',label='Total mean',zorder=2)
axs[0].plot(Scorers_outer[4,0,:],Scorers_outer[2,0,:],color='maroon',label='Total mean',zorder=2)
axs[1].plot(Scorers_outer[2,0,:],Scorers_outer[6,0,:],color='maroon',label='Total mean',zorder=2)
axs[0].scatter(Scorers[2],Scorers[0],s=40,color='blue',label='Decision boundary',zorder=3)
axs[1].scatter(Scorers[0],Scorers[4],s=40,color='blue',label='Decision boundary',zorder=3)
axs[0].scatter(Scorers_outer[4,0,decision],Scorers_outer[2,0,decision],s=60,color='blue',label='Decision boundary',zorder=3)
axs[1].scatter(Scorers_outer[2,0,decision],Scorers_outer[6,0,decision],s=60,color='blue',label='Decision boundary',zorder=3)
P = C[1,0]; PP = C[1,1]; N = C[0,0]; PN = C[0,1]; POP = sum(C[:,0])
#P,PP,N,PN = C
......@@ -439,16 +450,16 @@ def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
axs[0].set_ylabel(r'TPR = TP/P', fontsize=18); axs[1].set_ylabel(r'PPV = TP/PP', fontsize=18)
axs[0].legend(loc=0, frameon=False, fontsize=16); axs[1].legend(loc=0, frameon=False, fontsize=16)
axs[0].set_title('ROC curve',fontsize=18)
axs[0].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[1],3))+'$\pm$'+str(np.around(Model_Std[1],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[0].transAxes)
axs[0].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[0].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[0,decision],3))+'$\pm$'+str(np.around(Model_Std[0,decision],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[0].transAxes)
axs[1].set_title('Precision-Recall curve',fontsize=18)
axs[1].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[0],3))+'$\pm$'+str(np.around(Model_Std[0],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[1].transAxes)
axs[1].yaxis.set_label_position('right'); axs[1].yaxis.set_ticks_position('right')
axs[1].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[1].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].set_xlim([0,1]);axs[0].set_ylim([0,1.03]);axs[1].set_xlim([0,1]);axs[1].set_ylim([0,1.03])
axs[1].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[1,decision],3))+'$\pm$'+str(np.around(Model_Std[1,decision],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[1].transAxes)
if Save == True:
fig.savefig('./Dump/Fig/development/Curves_'+SaveName+'.pdf',format='pdf',dpi=300,transparent=True)
......@@ -457,6 +468,79 @@ def Curves(N2,K2,Curves,curve_i,Scorers,Model_Mean,Model_Std,C,Save,SaveName):
plt.show()
###
###
def Decision(thresholds,Model_Mean,Model_Std,Save,SaveName):
fig, axs = plt.subplots(1,1,facecolor='w',edgecolor='k',sharey=True)
fig.set_size_inches([6.5,6], forward=True)
fig.subplots_adjust(bottom=0.1,top=0.95,left=0.2,right=0.9,wspace=0.0,hspace=0.0)
axs.axis('equal')
scoreID = [11,18,19,20,21]
scoreLabel = ['ACC','HSS','MCC','J','Deltap']
linewidth=[.5,.5,1.0,.5,.5]
zorder=[0,0,1,0,0]
colors=['midnightblue','orange','maroon','purple','black']
for i in range(len(scoreID)):
#axs[0].plot(thresholds, Model_Mean[scoreID[i],:],label=scoreLabel[i])
axs.errorbar(thresholds, Model_Mean[scoreID[i],:], yerr=3*Model_Std[scoreID[i],:], fmt='none',elinewidth=linewidth[i], ecolor=colors[i], zorder=0)
axs.plot(thresholds, Model_Mean[scoreID[i],:], linewidth=linewidth[i], color=colors[i], zorder=zorder[i], label=scoreLabel[i]+r'$\pm$3$\cdot$std')
axs.set_xlim([0,1]); axs.set_ylim([0,1])
axs.set_xticks(np.linspace(0,1,11))
axs.set_xticks(np.linspace(0,1,21),minor='True')
axs.set_yticks(np.linspace(0,1,11))
axs.set_yticks(np.linspace(0,1,21),minor='True')
axs.set_ylabel('Score',fontsize=18)
axs.set_xlabel('Decision boundary',fontsize=18)
axs.legend(loc=0,frameon=False,fontsize=16)
axs.tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs.tick_params(axis ='y',which='both',direction='inout',labelsize=16)
if Save == True:
fig.savefig('./Dump/Fig/development/Decision_'+SaveName+'.pdf',format='pdf',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/development/Decision_'+SaveName+'.png',format='png',dpi=300,transparent=True)
plt.show()
###
###
def Calibration(Calib,bin_edges,Save,SaveName):
fig, axs = plt.subplots(2,1,facecolor='w',edgecolor='k',sharex=True)
fig.set_size_inches([6.5,10], forward=True)
fig.subplots_adjust(bottom=0.1,top=0.95,left=0.2,right=0.9,wspace=0.0,hspace=0.02)
axs = axs.ravel()
axs[0].axis('equal')
Calib_inner = np.nanmean(Calib,axis=1)
Calib_outer = np.nanmean(Calib_inner,axis=0)
Calib_outer_std = np.nanstd(Calib_inner,axis=0)
axs[0].plot(np.linspace(0,1,11),np.linspace(0,1,11),linestyle='--',color='gray',label=r'perfectly calibrated')
axs[0].errorbar(Calib_outer[1,:],Calib_outer[0,:],xerr=3*Calib_outer_std[1,:],yerr=3*Calib_outer_std[0,:],ecolor='black')
axs[1].bar(bin_edges[0:-1],height=Calib_outer[2,:],width=bin_edges[1]-bin_edges[0],align='edge',yerr=3*Calib_outer_std[2,:],color='darkgray',edgecolor='black',linewidth=0.5)
axs[0].set_ylim([-0.2,1.2])
axs[0].set_xticks(np.linspace(0,1,11)[::2])
axs[0].set_xticks(np.linspace(0,1,11),minor=True)
axs[0].set_ylabel('Fraction of positives',fontsize=18)
axs[0].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[0].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].legend(loc=0,frameon=False,fontsize=16)
axs[0].set_xticklabels([])
#axs[1].set_xlim([0,1])
axs[1].set_ylabel('Count',fontsize=18)
axs[1].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[1].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[1].set_xticks(np.linspace(0,1,11)[::2])
axs[1].set_xticks(np.linspace(0,1,11),minor=True)
axs[1].set_xticklabels(['0.0','0.2','0.4','0.6','0.8','1.0'])
axs[1].set_xlabel('Mean predicted probability',fontsize=18)
if Save == True:
fig.savefig('./Dump/Fig/development/Calibration_'+SaveName+'.pdf',format='pdf',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/development/Calibration_'+SaveName+'.png',format='png',dpi=300,transparent=True)
plt.show()
###
###
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Reference,Save):
fig, axs = plt.subplots(1,4,facecolor='w', edgecolor='k',sharey=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment