Commit cb42391f authored by Leonie Pick's avatar Leonie Pick

Fixed problem in calibration curve

parent 50acdff6
This diff is collapsed.
......@@ -31,4 +31,4 @@ from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.calibration import calibration_curve
#from sklearn.calibration import CalibratedClassifierCV
......@@ -498,7 +498,7 @@ def Get_Scorer():
return scoring
###
###
def Get_Pipelines(whichEstimator,nClasses):
def Get_Pipelines(whichEstimator,nClasses,nFeatures):
## Pre-processing
Scaler = preprocessing.StandardScaler(with_mean=True,with_std=True)
......@@ -536,7 +536,7 @@ def Get_Pipelines(whichEstimator,nClasses):
## Feature selection
FeatureSelect = RFE(Estimator, step = 1)
SelectParams = {'select__n_features_to_select':[1,2,3,4,5,6,7,8,9,10,11]}
SelectParams = {'select__n_features_to_select':np.linspace(1,nFeatures,nFeatures)}
## Pipelines
BPipe = Pipeline([('scale', Scaler),('estimate', Estimator)])
......@@ -716,7 +716,7 @@ def Get_MyScores(CM_means, n_classes):
return C, Scores
###
###
def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
def Assess_Model(Data,Target,N2,K2,modelTest0,n_classes,Plot,Save,SaveName):
# ROC, Precision-Recall curves, confusion matrix
#curve_i = np.linspace(0,1,100)
......@@ -729,25 +729,29 @@ def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
Scorers = np.zeros((N2,K2,22,2,len(thresholds)))
# Calibration
n_bins = 11
Calib = np.zeros((N2,K2,3,n_bins-1))
n_bins = 10
Calib = np.zeros((N2,K2,3,n_bins))
#modelTest0 = CalibratedClassifierCV(modelTest, method='sigmoid', cv=4)
for i in range(N2):
CV2 = StratifiedKFold(n_splits=K2, shuffle=True, random_state=i)
CV2_splits = CV2.split(Data,Target)
#modelTest0 = CalibratedClassifierCV(modelTest, method='sigmoid', cv=CV2)
ii = 0
for train2, test2 in CV2_splits:
D_train2, D_test2 = Data[train2,:], Data[test2,:]
T_train2, T_test2 = Target[train2], Target[test2]
modelTest.fit(D_train2,T_train2)
proba_test = modelTest.predict_proba(D_test2) # test score
pred_test = modelTest.predict(D_test2)
proba_train = modelTest.predict_proba(D_train2) # train score
pred_train = modelTest.predict(D_train2)
modelTest0.fit(D_train2,T_train2)
proba_test = modelTest0.predict_proba(D_test2) # test score
pred_test = modelTest0.predict(D_test2)
proba_train = modelTest0.predict_proba(D_train2) # train score
pred_train = modelTest0.predict(D_train2)
## NEW
for j in range(len(thresholds)):
......@@ -795,11 +799,19 @@ def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
Scorers[i,ii,19,0,j] = metrics.matthews_corrcoef(T_test2,pred_test_thres)
Scorers[i,ii,19,1,j] = metrics.matthews_corrcoef(T_train2,pred_train_thres)
# Calibration curve
Calib[i,ii,0,:], Calib[i,ii,1,:] = calibration_curve(T_test2, proba_test[:,1], normalize=False, n_bins=n_bins)
## Calibration curve
Calib[i,ii,2,:], bin_edges = np.histogram(proba_test[:,1], bins=np.arange(0,1.1,0.1))
# AUC scores
Prob_true, Prob_pred = calibration_curve(T_test2, proba_test[:,1], normalize=False, n_bins=n_bins)
bins_missing = np.where(Calib[i,ii,2,:]==0)[0]
if len(bins_missing) > 0:
Calib[i,ii,0,:]=np.insert(Prob_true,bins_missing,np.zeros(len(bins_missing))+np.nan)
Calib[i,ii,1,:]=np.insert(Prob_pred,bins_missing,np.zeros(len(bins_missing))+np.nan)
else:
Calib[i,ii,0,:]=Prob_true
Calib[i,ii,1,:]=Prob_pred
# Calib[i,ii,0,:], Calib[i,ii,1,:] = calibration_curve(T_test2, proba_test[:,1], normalize=False, n_bins=n_bins)
## AUC scores
Scorers[i,ii,6,0,np.isnan(Scorers[i,ii,6,0,:])] = 1; Scorers[i,ii,6,1,np.isnan(Scorers[i,ii,6,1,:])] = 1
Scorers[i,ii,0,0,:] = metrics.auc(Scorers[i,ii,4,0,:],Scorers[i,ii,2,0,:]); Scorers[i,ii,0,1,:] = metrics.auc(Scorers[i,ii,4,1,:],Scorers[i,ii,2,1,:]) #ROC
Scorers[i,ii,1,0,:] = metrics.auc(Scorers[i,ii,2,0,:],Scorers[i,ii,6,0,:]); Scorers[i,ii,1,1,:] = metrics.auc(Scorers[i,ii,2,1,:],Scorers[i,ii,6,1,:]) #P-R
......@@ -820,12 +832,12 @@ def Assess_Model(Data,Target,N2,K2,modelTest,n_classes,Plot,Save,SaveName):
print('best decision train:',thresholds[np.nanargmax(Model_Mean[19,1,:])])
decision_boundary = int(thresholds[np.nanargmax(Model_Mean[19,0,:])]*100)
#decision_boundary = 50
C, Scores = Get_MyScores(CM_means[:,0,decision_boundary], n_classes)
# Plot mean confusion matrix and curves
## Plot mean confusion matrix and curves
if Plot == True:
pl.CM(CM_means[:,0,decision_boundary],C,n_classes,Save,SaveName)
##pl.Curves(N2,K2,Curves,curve_i,Model_Mean[2:7,0],Model_Mean[0:2,0],Model_Std[0:2,0],C,Save,SaveName)
pl.Curves(N2,K2,Scorers,decision_boundary,Model_Mean[0:2,0,:],Model_Std[0:2,0,:],C,Save,SaveName)
pl.Decision(thresholds, Model_Mean[:,0,:], Model_Std[:,0,:],Save,SaveName)
pl.Calibration(Calib,bin_edges,Save,SaveName)
......
......@@ -435,10 +435,14 @@ def Curves(N2,K2,Scorers,decision,Model_Mean,Model_Std,C,Save,SaveName):
#axs[0].plot(curve_i,ROC_outer,color='maroon',label='Total mean',zorder=2)
#axs[1].plot(curve_i,PR_outer,color='maroon',label='Total mean',zorder=2)
axs[0].plot(Scorers_outer[4,0,:],Scorers_outer[2,0,:],color='maroon',label='Total mean',zorder=2)
axs[1].plot(Scorers_outer[2,0,:],Scorers_outer[6,0,:],color='maroon',label='Total mean',zorder=2)
axs[0].scatter(Scorers_outer[4,0,decision],Scorers_outer[2,0,decision],s=60,color='blue',label='Decision boundary',zorder=3)
axs[1].scatter(Scorers_outer[2,0,decision],Scorers_outer[6,0,decision],s=60,color='blue',label='Decision boundary',zorder=3)
axs[1].plot(Scorers_outer[2,0,:],Scorers_outer[6,0,:],color='maroon')#,label='Total mean',zorder=2)
axs[0].scatter(Scorers_outer[4,0,decision],Scorers_outer[2,0,decision],s=60,color='blue',label=r'$P=$'+str(np.around(decision/100,3)),zorder=3)
axs[1].scatter(Scorers_outer[2,0,decision],Scorers_outer[6,0,decision],s=60,color='blue',zorder=3)
axs[0].plot(np.linspace(0,Scorers_outer[4,0,decision],100),np.zeros(100)+Scorers_outer[2,0,decision],color='blue',linestyle='--',linewidth=0.5)
axs[0].plot(np.zeros(100)+Scorers_outer[4,0,decision],np.linspace(0,Scorers_outer[2,0,decision],100),color='blue',linestyle='--',linewidth=0.5)
axs[1].plot(np.linspace(Scorers_outer[2,0,decision],1,100),np.zeros(100)+Scorers_outer[6,0,decision],color='blue',linestyle='--',linewidth=0.5)
axs[1].plot(np.zeros(100)+Scorers_outer[2,0,decision],np.linspace(0,Scorers_outer[6,0,decision],100),color='blue',linestyle='--',linewidth=0.5)
P = C[1,0]; PP = C[1,1]; N = C[0,0]; PN = C[0,1]; POP = sum(C[:,0])
#P,PP,N,PN = C
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment