Commit eb9510fe authored by Leonie Pick's avatar Leonie Pick

Update prior to submission.

parent c92b7cc3
This diff is collapsed.
......@@ -7,6 +7,7 @@ import pandas as pd
import datetime as dt
import itertools
import time as time
import joblib
# IPython
from IPython.display import display
# Matplotlib
......
......@@ -132,12 +132,12 @@ def Search_TargetEvents(HMC, HMC11y, HMC5d, dHMC, HTime, DTime, grid, Save):
return IndexMin2
###
###
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Time, Date, YearStart, Save):
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Reference, Time, Date, YearStart, Save):
YearsIndex = np.where(Time[:,0] >= YearStart)[0]
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
TrFound = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
TrFound = Reference[:,2]
CIRs = np.where(np.logical_or(TrClass == 0, TrClass == 2))[0]
CMEs = np.where(TrClass==1)[0]
......@@ -179,19 +179,19 @@ def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Ti
TrFound[CIRIndices] = 1
TrFound[CMEIndices] = 1
Training[:,2] = TrFound
Reference[:,2] = TrFound
#print(grid[IndexWin,:])
#if Save == True:
#np.save('./Dump/Events',StormsWin)
###
TargetResults = {'No. targets':len(StormsWin),'Fraction [%]':len(StormsWin)*100/len(HMC[YearsIndex])}
TargetResults = {'No. target events':len(StormsWin),'Fraction [%]':len(StormsWin)*100/len(HMC[YearsIndex])}
TrainingResults = [[len(CIRs), len(CMEs), len(CIRs)+len(CMEs)],[len(Found_CIRs),len(Found_CMEs),len(Found_CIRs)+len(Found_CMEs)]]
SelectionParams = {'HMC percentile':grid[IndexWin,0],'Scaling power':grid[IndexWin,1], 'Separation [h]':grid[IndexWin,2], 'Min. HMC drop [nT]':grid[IndexWin,3]}
SelectionParams = {r'$P_n [nT]$':grid[:,0],r'$p_{sc}$':grid[:,1], r'$\Delta t$ [h]':grid[:,2], r' $Hs$ [nT]':-grid[:,3]}
display(pd.DataFrame(data=SelectionParams,index=['Selection parameters']))
display(pd.DataFrame(data=TargetResults,index=['Selection result']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Training set', 'in Target set']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Reference set', 'Training set']))
###
pl.IndexDist(Time,YearsIndex,StormsWin,Kp_all,KpHours_all,HMC,Save)
......@@ -324,7 +324,7 @@ def Get_F11(i,TrTimeIndex,TrClass):
return F11
###
###
def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
def Get_Features(Time, Storms, Reference, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
PeakTimes = Storms; PeakIndices = np.arange(0,len(Storms),1)
......@@ -342,8 +342,8 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
dAsyDD = np.gradient(ASY[:,0],1,edge_order=1)
dHMC11y = np.gradient(HMC11y,1,edge_order=1)
TrTimeIndex = Training[:,1]#Training.sel(Properties='TimeIndex')
TrClass = Training[:,0]#Training.sel(Properties='Class')
TrTimeIndex = Reference[:,1]#Reference.sel(Properties='TimeIndex')
TrClass = Reference[:,0]#Reference.sel(Properties='Class')
excludeEvents = []
count1=0; count2=0
......@@ -406,11 +406,11 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
return PeakIndices, FeatureMatrix
###
###
def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
def Get_Diagnostics(FeatureMatrix,Storms,Reference,Save):
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
TrKnown = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
TrKnown = Reference[:,2]
n_features = FeatureMatrix.shape[1]
n_classes = len(np.unique(TrClass))
......@@ -456,7 +456,7 @@ def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
k += 1
###
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference- IQR Overlap']))
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference - IQR Overlap']))
###
pl.Diagnostics(n_features, n_classes, NData, Save)
......
......@@ -20,8 +20,8 @@ def Selection(Time,Date,HMC,Index_thres1,StormIndices,IndexMin,IndexMin1,IndexMi
end = np.where(Time[:,4]==ends[i])[0][0]
ax[i].plot(Time[start:end,4],HMC[start:end],color='gray')
ax[i].plot(Time[start:end,4],Index_thres1[start:end],linestyle='--',color='midnightblue',label=r'HMC$_{\mathrm{thres}}\in$ ['+str(np.around(max(Index_thres1[start:end]),2))+' nT ,'+str(np.around(min(Index_thres1[start:end]),2))+' nT]')
ax[i].scatter(Time[StormIndices,4],HMC[StormIndices],color='midnightblue',s=5,zorder=3)
ax[i].plot(Time[start:end,4],Index_thres1[start:end],linestyle='--',color='midnightblue',label=r'$Hl_t\in$ ['+str(np.around(max(Index_thres1[start:end]),2))+' nT, '+str(np.around(min(Index_thres1[start:end]),2))+' nT]')
ax[i].scatter(Time[StormIndices,4],HMC[StormIndices],color='midnightblue',s=5,zorder=3)
ax[i].set_xlim([Time[start,4],Time[end,4]])
ax[i].set_ylabel('HMC [nT]')
......@@ -86,8 +86,8 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
HMC_step = 7.78
HMC_range = np.arange(10,480+HMC_step,HMC_step)
Index_range = Kp_range #HMC_range
step = Kp_step #HMC_step
Index_range = HMC_range #Kp_range
step = HMC_step #Kp_step
years = np.arange(1932,2016,1)
Percentages = np.zeros((len(years),len(Index_range)))
......@@ -121,11 +121,11 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
HMCValues = -HMC[Storms[StormsBlock]]
KpValuesAll.extend(KpValues.tolist())
Hist[l,:], bin_edges = np.histogram(KpValues, bins = Index_range)
#KpValuesAll.extend(KpValues.tolist())
#Hist[l,:], bin_edges = np.histogram(KpValues, bins = Index_range)
#KpValuesAll.extend(HMCValues.tolist())
#Hist[l,:], bin_edges = np.histogram(HMCValues, bins = Index_range)
KpValuesAll.extend(HMCValues.tolist())
Hist[l,:], bin_edges = np.histogram(HMCValues, bins = Index_range)
Hist[l,:] *= 100/len(Storms1932)
......@@ -171,17 +171,17 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
ax2.legend(loc=4,ncol=1,frameon=False,fontsize=10)
ax2.set_ylabel('Occurrence [%]')
ax2.set_xticklabels(np.arange(0,10,1))
ax2.set_xlabel('Kp')
ax2.set_ylim([0,10.5])
#ax2.set_xticklabels(np.arange(0,10,1))
#ax2.set_xlabel('Kp')
#ax2.set_ylim([0,10])
#ax2.set_xticklabels(np.around(Index_range[::3]/100,1))
#ax2.set_xlabel('-HMC/100 [nT]')
#ax2.set_ylim([0,45])
ax2.set_xticklabels(np.around(Index_range[::3]/100,1))
ax2.set_xlabel('-HMC/100 [nT]')
ax2.set_ylim([0,20])
if Save == True:
fig.savefig('./Dump/Fig/KpDist.pdf',format='pdf',dpi=200,transparent=True)
#fig.savefig('./Dump/Fig/KpDist.png',format='png',dpi=200,transparent=True)
fig.savefig('./Dump/Fig/HMCDist.pdf',format='pdf',dpi=200,transparent=True)
#fig.savefig('./Dump/Fig/HMCDist.png',format='png',dpi=200,transparent=True)
plt.show()
###
###
......@@ -194,7 +194,7 @@ def Diagnostics(n_features,n_classes,NData, Save):
fig.subplots_adjust(bottom=0.08,top=0.95,left=0.05,right=0.98,wspace=0.2,hspace=0.2)
ax = ax.ravel()
fNames = [r'$\Delta$HMC', r'dHMC/dt,r', r'$\Delta$dHMC/dt', r'r$\left(\hat{Z}_6,\hat{Z}_{18}\right)$', r'Range r$\left(\hat{Z}_{6\mathrm{ lag}}\mathrm{, }\hat{Z}_{18}\right)$', r'MAD$\left(d\hat{Z}_6/dt\right)$', r'arg max $Z_{\mathrm{MLT}}$', r'max$\left(ASY_{max}\right)$', 'FWHM $ASY_{\mathrm{DD}}$', 'Solar cycle phase', 'Recurrence', 'Ideal (synthetic)']
fNames = [r'$k$=1',r'$k$=9',r'$k$=11',r'$k$=2',r'$k$=10',r'$k$=3',r'$k$=6',r'$k$=7',r'$k$=8',r'$k$=4',r'$k$=5',r'$k$=12']
All = sum(len(NData[j]) for j in range(n_classes))
if n_classes == 2:
......@@ -224,7 +224,7 @@ def Diagnostics(n_features,n_classes,NData, Save):
ax[k].set_xlabel('Standardized feature value', fontsize=18)
#if i == n_features-1: ax[i].text(0.26,1.02,'Synthetic',fontsize=8,transform=ax[i].transAxes)
#else:
ax[k].text(0,1.02,str(k+1)+': '+fNames[i],fontsize=18,transform=ax[k].transAxes)
ax[k].text(0,1.02,fNames[i],fontsize=18,transform=ax[k].transAxes)
ax[k].set_xlim([0,1]); ax[k].set_xticks([0,0.25,0.5,0.75,1.0])
ax[k].tick_params(axis = 'y', which='both',direction = 'in',labelsize=16)
......@@ -269,7 +269,7 @@ def Ranking(Scores, Std, Data, Ranking, nfeaturesFinal, estimators, scoring_name
axu.tick_params(axis ='x',which='both',direction='inout',labelsize=16)
if est == 0:
ax[est].set_xlabel('Number of features',fontsize=18)
ax[est].set_xlabel(r'Number of features ($N_f$)',fontsize=18)
ax[est].legend(loc=4,ncol=1,frameon=False,fontsize=18,scatterpoints=1)
ax[est].set_ylabel(scoring_name,fontsize=18)
axu.set_xlabel(r'Feature IDs ( $\rightarrow$ cumulative)',fontsize=18)
......@@ -331,8 +331,8 @@ def ModelCounts(nModels,SumCounts,CV2_means,N1,K1,N2,K2,Param_Comb,BestID,scorin
def CM(CM,Condition,n_classes,Save):
CM = np.reshape(CM,(n_classes,n_classes))
#CM_names = ['TNR','FPR','FNR','TPR']
CM_names = ['NPV','FDR','FOR','PPV']
CM_names = ['TNR','FPR','FNR','TPR']
#CM_names = ['NPV','FDR','FOR','PPV']
CM_names = np.asarray(CM_names).reshape((2,2))
classes = range(n_classes)
......@@ -343,8 +343,8 @@ def CM(CM,Condition,n_classes,Save):
#Normalization
for i in range(n_classes):
#CM[i,:] /= Condition[i,0]
CM[:,i] /= Condition[i,1]
CM[i,:] /= Condition[i,0] #row-normalization
#CM[:,i] /= Condition[i,1] #column-normalization
class_names.append(np.str(i))
fig, ax = plt.subplots(1,1,facecolor='w',edgecolor='k',sharey=True)
......@@ -371,8 +371,8 @@ def CM(CM,Condition,n_classes,Save):
ax.grid(False)
if Save == True:
fig.savefig('./Dump/Fig/ConfusionCol.pdf',format='pdf',dpi=300,transparent=True)
#fig.savefig('./Dump/Fig/ConfusionCol.png',format='png',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/ConfusionRow.pdf',format='pdf',dpi=300,transparent=True)
#fig.savefig('./Dump/Fig/ConfusionRow.png',format='png',dpi=300,transparent=True)
plt.show()
###
......@@ -411,8 +411,8 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
ROC_inner = np.nanmean(tpr,axis=1); ROC_outer = np.nanmean(ROC_inner,axis=0)
PR_inner = np.nanmean(precision,axis=1); PR_outer = np.nanmean(PR_inner,axis=0)
axs[0].plot(curve_i,ROC_outer,color='maroon',label=r'AUC = '+str(np.around(Model_Mean[1],3))+'$\pm$'+str(np.around(Model_Std[1],5)),zorder=2)
axs[1].plot(curve_i,PR_outer,color='maroon',label=r'AUC = '+str(np.around(Model_Mean[0],3))+'$\pm$'+str(np.around(Model_Std[0],5)),zorder=2)
axs[0].plot(curve_i,ROC_outer,color='maroon',label='Total mean',zorder=2)
axs[1].plot(curve_i,PR_outer,color='maroon',label='Total mean',zorder=2)
P = C[1,0]; PP = C[1,1]; N = C[0,0]; PN = C[0,1]; POP = sum(C[:,0])
#P,PP,N,PN = C
......@@ -423,12 +423,14 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
axs[0].set_xlabel(r'FPR = FP/N', fontsize=18); axs[1].set_xlabel(r'TPR = TP/P', fontsize=18)
axs[0].set_ylabel(r'TPR = TP/P', fontsize=18); axs[1].set_ylabel(r'PPV = TP/PP', fontsize=18)
axs[0].legend(loc=0, frameon=False, fontsize=16); axs[1].legend(loc=0, frameon=False, fontsize=16)
axs[0].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[1],3))+'$\pm$'+str(np.around(Model_Std[1],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[0].transAxes)
axs[0].set_title('ROC curve',fontsize=18)
axs[0].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[0].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[1].set_title('Precision-Recall curve',fontsize=18)
axs[1].yaxis.set_label_position('right'); axs[1].yaxis.set_ticks_position('right')
axs[1].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[0],3))+'$\pm$'+str(np.around(Model_Std[0],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[1].transAxes)
axs[1].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[1].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].set_xlim([0,1]);axs[0].set_ylim([0,1.03]);axs[1].set_xlim([0,1]);axs[1].set_ylim([0,1.03])
......@@ -440,7 +442,7 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
plt.show()
###
###
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Training,Save):
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Reference,Save):
fig, axs = plt.subplots(1,4,facecolor='w', edgecolor='k',sharey=True)
fig.set_size_inches([6.69, 8.86], forward=True)
......@@ -531,14 +533,14 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
# Plot storm events from catalogs (papers)
cm = plt.cm.get_cmap('seismic')
KnownE = np.where(Training[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Training[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Training[KnownE,1])); x_t = np.zeros(len(Training[KnownE,1]))
for i in range(len(Training[KnownE,1])):
KnownE = np.where(Reference[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Reference[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Reference[KnownE,1])); x_t = np.zeros(len(Reference[KnownE,1]))
for i in range(len(Reference[KnownE,1])):
y_t[i] = np.divide(diff_t[i].total_seconds()/(60*60*24),SolarRot)
x_t[i] = np.mod(diff_t[i].total_seconds()/(60*60*24),SolarRot)
z = Training[KnownE,0]
z = Reference[KnownE,0]
sc = axs1.scatter(x_t, y_t, c=z, vmin=0.0, vmax=1.0, s=5, cmap=cm, zorder = 3)
cbar_axis = fig.add_axes([0.45, 0.032, 0.3, 0.01])
cbar = plt.colorbar(sc,cax=cbar_axis,use_gridspec=False, orientation ='horizontal',ticks=[0,0.5,1])
......@@ -547,18 +549,18 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
cbar.ax.set_xticklabels(['0:C/SIR','ICME probability','1:ICME'],fontsize=11)
cbar.ax.tick_params(axis='x',which='both',direction='in')
#CIRs = np.where(Training[KnownE,0] == 0)[0]
#CMEs = np.where(Training[KnownE,1] == 1)[0]
#CIRs = np.where(Reference[KnownE,0] == 0)[0]
#CMEs = np.where(Reference[KnownE,1] == 1)[0]
# Plots class occurrences
nip = 0
for i in [1,0]:
zi = np.where(z==i)[0]
ni, bins = np.histogram(HTime[Training[KnownE[zi],1],4],np.arange(1900,2017,1))
ni, bins = np.histogram(HTime[Reference[KnownE[zi],1],4],np.arange(1900,2017,1))
axs2.barh(np.arange(1900,2016,1), ni, left = nip, height=0.8,color=cm(float(i)),zorder=5)
nip += ni
#Turner_CIR = np.where(np.logical_and(Training[:,2] == b'Turner',Target['type'] ==0))[0]
#Turner_CIR = np.where(np.logical_and(Reference[:,2] == b'Turner',Target['type'] ==0))[0]
#Jian_CIR = np.where(np.logical_and(Target['source'] == b'Jian',Target['type'] ==0))[0]
#Shen_CIR = np.where(np.logical_and(Target['source'] == b'Shen',Target['type'] ==0))[0]
#Turner_CME = np.where(np.logical_and(Target['source'] == b'Turner',Target['type'] ==1))[0]
......
# ClassifyStorms - An automatic classifier for geomagnetic storm drivers based on machine learning techniques
# ClassifyStorms - an automatic classifier for geomagnetic storm drivers based on machine learning techniques
### Author
Leonie Pick, GFZ German Research Centre for Geoscieces, leonie.pick@gfz-potsdam.de
......@@ -32,7 +32,7 @@ executed by a supervised binary logistic regression model in the framework of py
### How to run ClassifyStorms?
Download the GitLab project 'ClassifyStorms' from http://gitext.gfz-potsdam.de/lpick/ClassifyStorms by clicking on the 'Download'
button (top right). Additionally, download 'Input.nc' from ftp://ftp.gfz-potsdam.de/home/mag/pick/ClassifyStorms and place it into
button (top right). Additionally, download 'Input.nc' from GFZ Data Services ( http://doi.org/10.5880/GFZ.2.3.2019.003 ) and place it into
the extracted directory 'ClassifyStorms-master'. Navigate to ClassifyStorms-master and start the jupyter server
(http://jupyter.org) by typing 'jupyter notebook' into the command line. This will open the jupyter 'Home' in your web
browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by clicking 'Run'.
......@@ -46,6 +46,6 @@ browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by cli
project's master branch (destination).
### References
+ The original version of this software is a supplement to Pick et al., GRL, 2019, submitted.
+ The original version of this software is a supplement to Pick et al., Earth and Space Science, 2019.
+ Details on the HMC index are given in Pick et al., JGR Space Physics, 2019
( http://doi.org/10.1029/2018JA026185, with data published under http://doi.org/10.5880/GFZ.2.3.2018.006 ).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment