Commit eb9510fe authored by Leonie Pick's avatar Leonie Pick

Update prior to submission.

parent c92b7cc3
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -7,6 +7,7 @@ import pandas as pd
import datetime as dt
import itertools
import time as time
import joblib
# IPython
from IPython.display import display
# Matplotlib
......
......@@ -132,12 +132,12 @@ def Search_TargetEvents(HMC, HMC11y, HMC5d, dHMC, HTime, DTime, grid, Save):
return IndexMin2
###
###
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Time, Date, YearStart, Save):
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Reference, Time, Date, YearStart, Save):
YearsIndex = np.where(Time[:,0] >= YearStart)[0]
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
TrFound = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
TrFound = Reference[:,2]
CIRs = np.where(np.logical_or(TrClass == 0, TrClass == 2))[0]
CMEs = np.where(TrClass==1)[0]
......@@ -179,19 +179,19 @@ def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Ti
TrFound[CIRIndices] = 1
TrFound[CMEIndices] = 1
Training[:,2] = TrFound
Reference[:,2] = TrFound
#print(grid[IndexWin,:])
#if Save == True:
#np.save('./Dump/Events',StormsWin)
###
TargetResults = {'No. targets':len(StormsWin),'Fraction [%]':len(StormsWin)*100/len(HMC[YearsIndex])}
TargetResults = {'No. target events':len(StormsWin),'Fraction [%]':len(StormsWin)*100/len(HMC[YearsIndex])}
TrainingResults = [[len(CIRs), len(CMEs), len(CIRs)+len(CMEs)],[len(Found_CIRs),len(Found_CMEs),len(Found_CIRs)+len(Found_CMEs)]]
SelectionParams = {'HMC percentile':grid[IndexWin,0],'Scaling power':grid[IndexWin,1], 'Separation [h]':grid[IndexWin,2], 'Min. HMC drop [nT]':grid[IndexWin,3]}
SelectionParams = {r'$P_n [nT]$':grid[:,0],r'$p_{sc}$':grid[:,1], r'$\Delta t$ [h]':grid[:,2], r' $Hs$ [nT]':-grid[:,3]}
display(pd.DataFrame(data=SelectionParams,index=['Selection parameters']))
display(pd.DataFrame(data=TargetResults,index=['Selection result']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Training set', 'in Target set']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Reference set', 'Training set']))
###
pl.IndexDist(Time,YearsIndex,StormsWin,Kp_all,KpHours_all,HMC,Save)
......@@ -324,7 +324,7 @@ def Get_F11(i,TrTimeIndex,TrClass):
return F11
###
###
def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
def Get_Features(Time, Storms, Reference, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
PeakTimes = Storms; PeakIndices = np.arange(0,len(Storms),1)
......@@ -342,8 +342,8 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
dAsyDD = np.gradient(ASY[:,0],1,edge_order=1)
dHMC11y = np.gradient(HMC11y,1,edge_order=1)
TrTimeIndex = Training[:,1]#Training.sel(Properties='TimeIndex')
TrClass = Training[:,0]#Training.sel(Properties='Class')
TrTimeIndex = Reference[:,1]#Reference.sel(Properties='TimeIndex')
TrClass = Reference[:,0]#Reference.sel(Properties='Class')
excludeEvents = []
count1=0; count2=0
......@@ -406,11 +406,11 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
return PeakIndices, FeatureMatrix
###
###
def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
def Get_Diagnostics(FeatureMatrix,Storms,Reference,Save):
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
TrKnown = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
TrKnown = Reference[:,2]
n_features = FeatureMatrix.shape[1]
n_classes = len(np.unique(TrClass))
......@@ -456,7 +456,7 @@ def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
k += 1
###
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference- IQR Overlap']))
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference - IQR Overlap']))
###
pl.Diagnostics(n_features, n_classes, NData, Save)
......
......@@ -20,8 +20,8 @@ def Selection(Time,Date,HMC,Index_thres1,StormIndices,IndexMin,IndexMin1,IndexMi
end = np.where(Time[:,4]==ends[i])[0][0]
ax[i].plot(Time[start:end,4],HMC[start:end],color='gray')
ax[i].plot(Time[start:end,4],Index_thres1[start:end],linestyle='--',color='midnightblue',label=r'HMC$_{\mathrm{thres}}\in$ ['+str(np.around(max(Index_thres1[start:end]),2))+' nT ,'+str(np.around(min(Index_thres1[start:end]),2))+' nT]')
ax[i].scatter(Time[StormIndices,4],HMC[StormIndices],color='midnightblue',s=5,zorder=3)
ax[i].plot(Time[start:end,4],Index_thres1[start:end],linestyle='--',color='midnightblue',label=r'$Hl_t\in$ ['+str(np.around(max(Index_thres1[start:end]),2))+' nT, '+str(np.around(min(Index_thres1[start:end]),2))+' nT]')
ax[i].scatter(Time[StormIndices,4],HMC[StormIndices],color='midnightblue',s=5,zorder=3)
ax[i].set_xlim([Time[start,4],Time[end,4]])
ax[i].set_ylabel('HMC [nT]')
......@@ -86,8 +86,8 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
HMC_step = 7.78
HMC_range = np.arange(10,480+HMC_step,HMC_step)
Index_range = Kp_range #HMC_range
step = Kp_step #HMC_step
Index_range = HMC_range #Kp_range
step = HMC_step #Kp_step
years = np.arange(1932,2016,1)
Percentages = np.zeros((len(years),len(Index_range)))
......@@ -121,11 +121,11 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
HMCValues = -HMC[Storms[StormsBlock]]
KpValuesAll.extend(KpValues.tolist())
Hist[l,:], bin_edges = np.histogram(KpValues, bins = Index_range)
#KpValuesAll.extend(KpValues.tolist())
#Hist[l,:], bin_edges = np.histogram(KpValues, bins = Index_range)
#KpValuesAll.extend(HMCValues.tolist())
#Hist[l,:], bin_edges = np.histogram(HMCValues, bins = Index_range)
KpValuesAll.extend(HMCValues.tolist())
Hist[l,:], bin_edges = np.histogram(HMCValues, bins = Index_range)
Hist[l,:] *= 100/len(Storms1932)
......@@ -171,17 +171,17 @@ def IndexDist(Time,YearsIndex,Storms,Kp_all,KpHours_all,HMC,Save):
ax2.legend(loc=4,ncol=1,frameon=False,fontsize=10)
ax2.set_ylabel('Occurrence [%]')
ax2.set_xticklabels(np.arange(0,10,1))
ax2.set_xlabel('Kp')
ax2.set_ylim([0,10.5])
#ax2.set_xticklabels(np.arange(0,10,1))
#ax2.set_xlabel('Kp')
#ax2.set_ylim([0,10])
#ax2.set_xticklabels(np.around(Index_range[::3]/100,1))
#ax2.set_xlabel('-HMC/100 [nT]')
#ax2.set_ylim([0,45])
ax2.set_xticklabels(np.around(Index_range[::3]/100,1))
ax2.set_xlabel('-HMC/100 [nT]')
ax2.set_ylim([0,20])
if Save == True:
fig.savefig('./Dump/Fig/KpDist.pdf',format='pdf',dpi=200,transparent=True)
#fig.savefig('./Dump/Fig/KpDist.png',format='png',dpi=200,transparent=True)
fig.savefig('./Dump/Fig/HMCDist.pdf',format='pdf',dpi=200,transparent=True)
#fig.savefig('./Dump/Fig/HMCDist.png',format='png',dpi=200,transparent=True)
plt.show()
###
###
......@@ -194,7 +194,7 @@ def Diagnostics(n_features,n_classes,NData, Save):
fig.subplots_adjust(bottom=0.08,top=0.95,left=0.05,right=0.98,wspace=0.2,hspace=0.2)
ax = ax.ravel()
fNames = [r'$\Delta$HMC', r'dHMC/dt,r', r'$\Delta$dHMC/dt', r'r$\left(\hat{Z}_6,\hat{Z}_{18}\right)$', r'Range r$\left(\hat{Z}_{6\mathrm{ lag}}\mathrm{, }\hat{Z}_{18}\right)$', r'MAD$\left(d\hat{Z}_6/dt\right)$', r'arg max $Z_{\mathrm{MLT}}$', r'max$\left(ASY_{max}\right)$', 'FWHM $ASY_{\mathrm{DD}}$', 'Solar cycle phase', 'Recurrence', 'Ideal (synthetic)']
fNames = [r'$k$=1',r'$k$=9',r'$k$=11',r'$k$=2',r'$k$=10',r'$k$=3',r'$k$=6',r'$k$=7',r'$k$=8',r'$k$=4',r'$k$=5',r'$k$=12']
All = sum(len(NData[j]) for j in range(n_classes))
if n_classes == 2:
......@@ -224,7 +224,7 @@ def Diagnostics(n_features,n_classes,NData, Save):
ax[k].set_xlabel('Standardized feature value', fontsize=18)
#if i == n_features-1: ax[i].text(0.26,1.02,'Synthetic',fontsize=8,transform=ax[i].transAxes)
#else:
ax[k].text(0,1.02,str(k+1)+': '+fNames[i],fontsize=18,transform=ax[k].transAxes)
ax[k].text(0,1.02,fNames[i],fontsize=18,transform=ax[k].transAxes)
ax[k].set_xlim([0,1]); ax[k].set_xticks([0,0.25,0.5,0.75,1.0])
ax[k].tick_params(axis = 'y', which='both',direction = 'in',labelsize=16)
......@@ -269,7 +269,7 @@ def Ranking(Scores, Std, Data, Ranking, nfeaturesFinal, estimators, scoring_name
axu.tick_params(axis ='x',which='both',direction='inout',labelsize=16)
if est == 0:
ax[est].set_xlabel('Number of features',fontsize=18)
ax[est].set_xlabel(r'Number of features ($N_f$)',fontsize=18)
ax[est].legend(loc=4,ncol=1,frameon=False,fontsize=18,scatterpoints=1)
ax[est].set_ylabel(scoring_name,fontsize=18)
axu.set_xlabel(r'Feature IDs ( $\rightarrow$ cumulative)',fontsize=18)
......@@ -331,8 +331,8 @@ def ModelCounts(nModels,SumCounts,CV2_means,N1,K1,N2,K2,Param_Comb,BestID,scorin
def CM(CM,Condition,n_classes,Save):
CM = np.reshape(CM,(n_classes,n_classes))
#CM_names = ['TNR','FPR','FNR','TPR']
CM_names = ['NPV','FDR','FOR','PPV']
CM_names = ['TNR','FPR','FNR','TPR']
#CM_names = ['NPV','FDR','FOR','PPV']
CM_names = np.asarray(CM_names).reshape((2,2))
classes = range(n_classes)
......@@ -343,8 +343,8 @@ def CM(CM,Condition,n_classes,Save):
#Normalization
for i in range(n_classes):
#CM[i,:] /= Condition[i,0]
CM[:,i] /= Condition[i,1]
CM[i,:] /= Condition[i,0] #row-normalization
#CM[:,i] /= Condition[i,1] #column-normalization
class_names.append(np.str(i))
fig, ax = plt.subplots(1,1,facecolor='w',edgecolor='k',sharey=True)
......@@ -371,8 +371,8 @@ def CM(CM,Condition,n_classes,Save):
ax.grid(False)
if Save == True:
fig.savefig('./Dump/Fig/ConfusionCol.pdf',format='pdf',dpi=300,transparent=True)
#fig.savefig('./Dump/Fig/ConfusionCol.png',format='png',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/ConfusionRow.pdf',format='pdf',dpi=300,transparent=True)
#fig.savefig('./Dump/Fig/ConfusionRow.png',format='png',dpi=300,transparent=True)
plt.show()
###
......@@ -411,8 +411,8 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
ROC_inner = np.nanmean(tpr,axis=1); ROC_outer = np.nanmean(ROC_inner,axis=0)
PR_inner = np.nanmean(precision,axis=1); PR_outer = np.nanmean(PR_inner,axis=0)
axs[0].plot(curve_i,ROC_outer,color='maroon',label=r'AUC = '+str(np.around(Model_Mean[1],3))+'$\pm$'+str(np.around(Model_Std[1],5)),zorder=2)
axs[1].plot(curve_i,PR_outer,color='maroon',label=r'AUC = '+str(np.around(Model_Mean[0],3))+'$\pm$'+str(np.around(Model_Std[0],5)),zorder=2)
axs[0].plot(curve_i,ROC_outer,color='maroon',label='Total mean',zorder=2)
axs[1].plot(curve_i,PR_outer,color='maroon',label='Total mean',zorder=2)
P = C[1,0]; PP = C[1,1]; N = C[0,0]; PN = C[0,1]; POP = sum(C[:,0])
#P,PP,N,PN = C
......@@ -423,12 +423,14 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
axs[0].set_xlabel(r'FPR = FP/N', fontsize=18); axs[1].set_xlabel(r'TPR = TP/P', fontsize=18)
axs[0].set_ylabel(r'TPR = TP/P', fontsize=18); axs[1].set_ylabel(r'PPV = TP/PP', fontsize=18)
axs[0].legend(loc=0, frameon=False, fontsize=16); axs[1].legend(loc=0, frameon=False, fontsize=16)
axs[0].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[1],3))+'$\pm$'+str(np.around(Model_Std[1],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[0].transAxes)
axs[0].set_title('ROC curve',fontsize=18)
axs[0].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[0].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[1].set_title('Precision-Recall curve',fontsize=18)
axs[1].yaxis.set_label_position('right'); axs[1].yaxis.set_ticks_position('right')
axs[1].text(0.275,0.5,r'AUC = '+str(np.around(Model_Mean[0],3))+'$\pm$'+str(np.around(Model_Std[0],5)),bbox=dict(boxstyle='square',ec=(0.,0.,0.),fc=(1.,1.,1.)),fontsize=16,transform=axs[1].transAxes)
axs[1].tick_params(axis ='x',which='both',direction='inout',labelsize=16)
axs[1].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].set_xlim([0,1]);axs[0].set_ylim([0,1.03]);axs[1].set_xlim([0,1]);axs[1].set_ylim([0,1.03])
......@@ -440,7 +442,7 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save):
plt.show()
###
###
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Training,Save):
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Reference,Save):
fig, axs = plt.subplots(1,4,facecolor='w', edgecolor='k',sharey=True)
fig.set_size_inches([6.69, 8.86], forward=True)
......@@ -531,14 +533,14 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
# Plot storm events from catalogs (papers)
cm = plt.cm.get_cmap('seismic')
KnownE = np.where(Training[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Training[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Training[KnownE,1])); x_t = np.zeros(len(Training[KnownE,1]))
for i in range(len(Training[KnownE,1])):
KnownE = np.where(Reference[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Reference[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Reference[KnownE,1])); x_t = np.zeros(len(Reference[KnownE,1]))
for i in range(len(Reference[KnownE,1])):
y_t[i] = np.divide(diff_t[i].total_seconds()/(60*60*24),SolarRot)
x_t[i] = np.mod(diff_t[i].total_seconds()/(60*60*24),SolarRot)
z = Training[KnownE,0]
z = Reference[KnownE,0]
sc = axs1.scatter(x_t, y_t, c=z, vmin=0.0, vmax=1.0, s=5, cmap=cm, zorder = 3)
cbar_axis = fig.add_axes([0.45, 0.032, 0.3, 0.01])
cbar = plt.colorbar(sc,cax=cbar_axis,use_gridspec=False, orientation ='horizontal',ticks=[0,0.5,1])
......@@ -547,18 +549,18 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
cbar.ax.set_xticklabels(['0:C/SIR','ICME probability','1:ICME'],fontsize=11)
cbar.ax.tick_params(axis='x',which='both',direction='in')
#CIRs = np.where(Training[KnownE,0] == 0)[0]
#CMEs = np.where(Training[KnownE,1] == 1)[0]
#CIRs = np.where(Reference[KnownE,0] == 0)[0]
#CMEs = np.where(Reference[KnownE,1] == 1)[0]
# Plots class occurrences
nip = 0
for i in [1,0]:
zi = np.where(z==i)[0]
ni, bins = np.histogram(HTime[Training[KnownE[zi],1],4],np.arange(1900,2017,1))
ni, bins = np.histogram(HTime[Reference[KnownE[zi],1],4],np.arange(1900,2017,1))
axs2.barh(np.arange(1900,2016,1), ni, left = nip, height=0.8,color=cm(float(i)),zorder=5)
nip += ni
#Turner_CIR = np.where(np.logical_and(Training[:,2] == b'Turner',Target['type'] ==0))[0]
#Turner_CIR = np.where(np.logical_and(Reference[:,2] == b'Turner',Target['type'] ==0))[0]
#Jian_CIR = np.where(np.logical_and(Target['source'] == b'Jian',Target['type'] ==0))[0]
#Shen_CIR = np.where(np.logical_and(Target['source'] == b'Shen',Target['type'] ==0))[0]
#Turner_CME = np.where(np.logical_and(Target['source'] == b'Turner',Target['type'] ==1))[0]
......
# ClassifyStorms - An automatic classifier for geomagnetic storm drivers based on machine learning techniques
# ClassifyStorms - an automatic classifier for geomagnetic storm drivers based on machine learning techniques
### Author
Leonie Pick, GFZ German Research Centre for Geoscieces, leonie.pick@gfz-potsdam.de
......@@ -32,7 +32,7 @@ executed by a supervised binary logistic regression model in the framework of py
### How to run ClassifyStorms?
Download the GitLab project 'ClassifyStorms' from http://gitext.gfz-potsdam.de/lpick/ClassifyStorms by clicking on the 'Download'
button (top right). Additionally, download 'Input.nc' from ftp://ftp.gfz-potsdam.de/home/mag/pick/ClassifyStorms and place it into
button (top right). Additionally, download 'Input.nc' from GFZ Data Services ( http://doi.org/10.5880/GFZ.2.3.2019.003 ) and place it into
the extracted directory 'ClassifyStorms-master'. Navigate to ClassifyStorms-master and start the jupyter server
(http://jupyter.org) by typing 'jupyter notebook' into the command line. This will open the jupyter 'Home' in your web
browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by clicking 'Run'.
......@@ -46,6 +46,6 @@ browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by cli
project's master branch (destination).
### References
+ The original version of this software is a supplement to Pick et al., GRL, 2019, submitted.
+ The original version of this software is a supplement to Pick et al., Earth and Space Science, 2019.
+ Details on the HMC index are given in Pick et al., JGR Space Physics, 2019
( http://doi.org/10.1029/2018JA026185, with data published under http://doi.org/10.5880/GFZ.2.3.2018.006 ).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment