Commit 466dc28d authored by Leonie Pick's avatar Leonie Pick

Updates to software for consistency with article for ESS

parent 514d6d50
This diff is collapsed.
......@@ -7,6 +7,7 @@ import pandas as pd
import datetime as dt
import itertools
import time as time
import joblib
# IPython
from IPython.display import display
# Matplotlib
......
......@@ -135,12 +135,12 @@ def Search_TargetEvents(HMC, HMC11y, HMC5d, dHMC, HTime, DTime, grid, Plot, Save
return IndexMin2
###
###
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Time, Date, YearStart, Plot, Save):
def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Reference, Time, Date, YearStart, Plot, Save):
YearsIndex = np.where(Time[:,0] >= YearStart)[0]
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
#TrFound = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
#TrFound = Reference[:,2]
CIRs = np.where(np.logical_or(TrClass == 0, TrClass == 2))[0]
CMEs = np.where(TrClass==1)[0]
......@@ -160,7 +160,7 @@ def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Ti
#thresHMC = np.arange(0,20,2)
grid = np.stack(np.meshgrid(percentile,power,mini_range,thresHMC),-1).reshape(-1,4)
TrFound = np.zeros((len(grid),len(Training)))
TrFound = np.zeros((len(grid),len(Reference)))
StormsWin = dict()
FoundStorms = dict()
......@@ -196,19 +196,19 @@ def Get_TargetEvents(HMC, HMC11y, HMC5d, dHMC, Kp_all, KpHours_all, Training, Ti
TrFound[i,CIRIndices] = 1
TrFound[i,CMEIndices] = 1
#Training[:,2] = TrFound
#Reference[:,2] = TrFound
##print(grid[IndexWin,:])
IndexWin = 0
SelectStr = str(int(IndexWin))
Training[:,2] = TrFound[IndexWin,:]
Reference[:,2] = TrFound[IndexWin,:]
###
TargetResults = {'No. targets': len(StormsWin[SelectStr]),'Fraction [%]':len(StormsWin[SelectStr])*100/len(HMC[YearsIndex])}
TargetResults = {'No. target events': len(StormsWin[SelectStr]),'Fraction [%]':len(StormsWin[SelectStr])*100/len(HMC[YearsIndex])}
TrainingResults = [[len(CIRs), len(CMEs), len(CIRs)+len(CMEs)],[len(FoundStorms['CIRs_'+SelectStr]),len(FoundStorms['CMEs_'+SelectStr]),len(FoundStorms['CIRs_'+SelectStr])+len(FoundStorms['CMEs_'+SelectStr])]]
SelectionParams = {'HMC percentile':grid[:,0],'Scaling power':grid[:,1], 'Separation [h]':grid[:,2], 'Min. HMC drop [nT]':grid[:,3]}
SelectionParams = {r'$P_n [nT]$':grid[:,0],r'$p_{sc}$':grid[:,1], r'$\Delta t$ [h]':grid[:,2], r' $Hs$ [nT]':-grid[:,3]}
display(pd.DataFrame(data=SelectionParams))
display(pd.DataFrame(data=TargetResults,index=['Selection']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Training set', 'in Target set']))
display(pd.DataFrame(data=TrainingResults,columns=['No. CIRs','No. CMEs', 'Total'], index=['Reference set', 'Training set']))
###
if Plot == True:
......@@ -342,7 +342,7 @@ def Get_F11(i,TrTimeIndex,TrClass):
return F11
###
###
def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
def Get_Features(Time, Storms, Reference, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, Save):
PeakTimes = Storms; PeakIndices = np.arange(0,len(Storms),1)
......@@ -360,8 +360,8 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
dAsyDD = np.gradient(ASY[:,0],1,edge_order=1)
dHMC11y = np.gradient(HMC11y,1,edge_order=1)
TrTimeIndex = Training[:,1]#Training.sel(Properties='TimeIndex')
TrClass = Training[:,0]#Training.sel(Properties='Class')
TrTimeIndex = Reference[:,1]#Reference.sel(Properties='TimeIndex')
TrClass = Reference[:,0]#Reference.sel(Properties='Class')
excludeEvents = []
count1=0; count2=0
......@@ -419,16 +419,16 @@ def Get_Features(Time, Storms, Training, HMC, HMC11y, HMC1y, dHMC, B_MLT, ASY, S
print('Caution!')
if Save == True:
np.savez('./Dump/Out/Features',Features=FeatureMatrix,FeatureIndices=PeakIndices)
np.savez('./Dump/Out/development/Features',Features=FeatureMatrix,FeatureIndices=PeakIndices)
return PeakIndices, FeatureMatrix
###
###
def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
def Get_Diagnostics(FeatureMatrix,Storms,Reference,Save):
TrClass = Training[:,0]
TrTimeIndex = Training[:,1]
TrKnown = Training[:,2]
TrClass = Reference[:,0]
TrTimeIndex = Reference[:,1]
TrKnown = Reference[:,2]
n_features = FeatureMatrix.shape[1]
n_classes = len(np.unique(TrClass))
......@@ -474,7 +474,7 @@ def Get_Diagnostics(FeatureMatrix,Storms,Training,Save):
k += 1
###
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference- IQR Overlap']))
display(pd.DataFrame(data=FeatureResults,index=np.linspace(1,12,12,dtype=int),columns=['Q2 CIRs','Q2 CMEs','Q2 Difference','IQR Overlap', 'Q2 Difference - IQR Overlap']))
###
pl.Diagnostics(n_features, n_classes, NData, Save)
......
......@@ -280,7 +280,7 @@ def Ranking(Scores, Std, Data, Ranking, nfeaturesFinal, estimators, scoring_name
axu.tick_params(axis ='x',which='both',direction='inout',labelsize=16)
if est == 0:
ax[est].set_xlabel('Number of features',fontsize=18)
ax[est].set_xlabel(r'Number of features ($N_f$)',fontsize=18)
ax[est].legend(loc=4,ncol=1,frameon=False,fontsize=18,scatterpoints=1)
ax[est].set_ylabel(scoring_name,fontsize=18)
axu.set_xlabel(r'Feature IDs ( $\rightarrow$ cumulative)',fontsize=18)
......@@ -342,9 +342,9 @@ def ModelCounts(nModels,SumCounts,CV2_means,N1,K1,N2,K2,Param_Comb,BestID,scorin
def CM(CM,Condition,n_classes,Save,SaveName):
CM = np.reshape(CM,(n_classes,n_classes))
#CM_names = ['TNR','FPR','FNR','TPR']
CM_names = ['TNR','FPR','FNR','TPR']
#CM_names = ['NPV','FDR','FOR','PPV']
CM_names = ['TN','FP,','FN','TP']
#CM_names = ['TN','FP','FN','TP']
CM_names = np.asarray(CM_names).reshape((2,2))
classes = range(n_classes)
......@@ -353,10 +353,10 @@ def CM(CM,Condition,n_classes,Save,SaveName):
#print(Condition)
print(CM)
##Normalization
#for i in range(n_classes):
##CM[i,:] /= Condition[i,0]
#CM[:,i] /= Condition[i,1]
#Normalization
for i in range(n_classes):
CM[i,:] /= Condition[i,0] #row-normalization
#CM[:,i] /= Condition[i,1] #column-normalization
#class_names.append(np.str(i))
fig, ax = plt.subplots(1,1,facecolor='w',edgecolor='k',sharey=True)
......@@ -447,14 +447,14 @@ def Curves(N2,K2,Curves,curve_i,Model_Mean,Model_Std,C,Save,SaveName):
axs[1].tick_params(axis ='y',which='both',direction='inout',labelsize=16)
axs[0].set_xlim([0,1]);axs[0].set_ylim([0,1.03]);axs[1].set_xlim([0,1]);axs[1].set_ylim([0,1.03])
#if Save == True:
fig.savefig('./Dump/Fig/development/Curves_'+SaveName+'.pdf',format='pdf',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/development/Curves_'+SaveName+'.png',format='png',dpi=300,transparent=True)
if Save == True:
fig.savefig('./Dump/Fig/development/Curves_'+SaveName+'.pdf',format='pdf',dpi=300,transparent=True)
fig.savefig('./Dump/Fig/development/Curves_'+SaveName+'.png',format='png',dpi=300,transparent=True)
plt.show()
###
###
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Training,Save):
def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Reference,Save):
fig, axs = plt.subplots(1,4,facecolor='w', edgecolor='k',sharey=True)
fig.set_size_inches([6.69, 8.86], forward=True)
......@@ -545,14 +545,14 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
# Plot storm events from catalogs (papers)
cm = plt.cm.get_cmap('seismic')
KnownE = np.where(Training[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Training[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Training[KnownE,1])); x_t = np.zeros(len(Training[KnownE,1]))
for i in range(len(Training[KnownE,1])):
KnownE = np.where(Reference[:,2]==1)[0]
diff_t = (pd.to_datetime(Time[Reference[KnownE,1]])-dt.datetime(1900,1,1,0,0,0))
y_t = np.zeros(len(Reference[KnownE,1])); x_t = np.zeros(len(Reference[KnownE,1]))
for i in range(len(Reference[KnownE,1])):
y_t[i] = np.divide(diff_t[i].total_seconds()/(60*60*24),SolarRot)
x_t[i] = np.mod(diff_t[i].total_seconds()/(60*60*24),SolarRot)
z = Training[KnownE,0]
z = Reference[KnownE,0]
sc = axs1.scatter(x_t, y_t, c=z, vmin=0.0, vmax=1.0, s=5, cmap=cm, zorder = 3)
cbar_axis = fig.add_axes([0.45, 0.032, 0.3, 0.01])
cbar = plt.colorbar(sc,cax=cbar_axis,use_gridspec=False, orientation ='horizontal',ticks=[0,0.5,1])
......@@ -561,18 +561,18 @@ def Musical(Time,HTime,Storms,Kp_all,KpHours_all,SN,SNYears,HMC,HMC_filt,Trainin
cbar.ax.set_xticklabels(['0:C/SIR','ICME probability','1:ICME'],fontsize=11)
cbar.ax.tick_params(axis='x',which='both',direction='in')
#CIRs = np.where(Training[KnownE,0] == 0)[0]
#CMEs = np.where(Training[KnownE,1] == 1)[0]
#CIRs = np.where(Reference[KnownE,0] == 0)[0]
#CMEs = np.where(Reference[KnownE,1] == 1)[0]
# Plots class occurrences
nip = 0
for i in [1,0]:
zi = np.where(z==i)[0]
ni, bins = np.histogram(HTime[Training[KnownE[zi],1],4],np.arange(1900,2017,1))
ni, bins = np.histogram(HTime[Reference[KnownE[zi],1],4],np.arange(1900,2017,1))
axs2.barh(np.arange(1900,2016,1), ni, left = nip, height=0.8,color=cm(float(i)),zorder=5)
nip += ni
#Turner_CIR = np.where(np.logical_and(Training[:,2] == b'Turner',Target['type'] ==0))[0]
#Turner_CIR = np.where(np.logical_and(Reference[:,2] == b'Turner',Target['type'] ==0))[0]
#Jian_CIR = np.where(np.logical_and(Target['source'] == b'Jian',Target['type'] ==0))[0]
#Shen_CIR = np.where(np.logical_and(Target['source'] == b'Shen',Target['type'] ==0))[0]
#Turner_CME = np.where(np.logical_and(Target['source'] == b'Turner',Target['type'] ==1))[0]
......
# ClassifyStorms - An automatic classifier for geomagnetic storm drivers based on machine learning techniques
# ClassifyStorms - an automatic classifier for geomagnetic storm drivers based on machine learning techniques
### Author
Leonie Pick, GFZ German Research Centre for Geoscieces, leonie.pick@gfz-potsdam.de
......@@ -32,7 +32,7 @@ executed by a supervised binary logistic regression model in the framework of py
### How to run ClassifyStorms?
Download the GitLab project 'ClassifyStorms' from http://gitext.gfz-potsdam.de/lpick/ClassifyStorms by clicking on the 'Download'
button (top right). Additionally, download 'Input.nc' from ftp://ftp.gfz-potsdam.de/home/mag/pick/ClassifyStorms and place it into
button (top right). Additionally, download 'Input.nc' from GFZ Data Services ( http://doi.org/10.5880/GFZ.2.3.2019.003 ) and place it into
the extracted directory 'ClassifyStorms-master'. Navigate to ClassifyStorms-master and start the jupyter server
(http://jupyter.org) by typing 'jupyter notebook' into the command line. This will open the jupyter 'Home' in your web
browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by clicking 'Run'.
......@@ -46,6 +46,6 @@ browser. Select 'ClassifyStorms.ipynb' from the list and run the notebook by cli
project's master branch (destination).
### References
+ The original version of this software is a supplement to Pick et al., GRL, 2019, submitted.
+ The original version of this software is a supplement to Pick et al., Earth and Space Science, 2019.
+ Details on the HMC index are given in Pick et al., JGR Space Physics, 2019
( http://doi.org/10.1029/2018JA026185, with data published under http://doi.org/10.5880/GFZ.2.3.2018.006 ).
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment