Commit 0a011cc4 authored by Pablo Iturrieta Rebolledo's avatar Pablo Iturrieta Rebolledo
Browse files

clean pgs code

defined final results figure
modified get_forecast_figures to be adapted to new data structure and new ModelObjectg structure

# Pending: Modify T-test to include t-test significante along with w-test significance
parent 82b27067
......@@ -25,10 +25,9 @@ def pgs(Forecasts, Catalog, ref='avg', bin='spatial_magnitude', save_obj=False,
catalog_counts = Catalog.spatial_counts().reshape(-1,1)
probabilities_forecasts = []
for f_i in Forecasts:
data = np.abs(f_i[0].data)
data = np.abs(f_i.data)
if bin == 'spatial':
data = np.sum(data, axis=1).reshape(-1,1 )
print(catalog_counts.shape, data.shape)
data = np.sum(data, axis=1).reshape(-1, 1)
prob_i = scipy.stats.poisson.pmf(catalog_counts, data)
probabilities_forecasts.append(prob_i)
......@@ -37,6 +36,7 @@ def pgs(Forecasts, Catalog, ref='avg', bin='spatial_magnitude', save_obj=False,
payout = np.zeros((k, catalog_counts.shape[0], catalog_counts.shape[1] ))
for i in range(payout.shape[0]):
payout[i] = (-1 + probabilities_forecasts[i]/total_bet)
elif isinstance(ref, int): ## Choose model to do head-to-head betting
ref = probabilities_forecasts[ref]
payout = np.zeros((k, catalog_counts.shape[0], catalog_counts.shape[1]))
......@@ -50,11 +50,10 @@ def pgs(Forecasts, Catalog, ref='avg', bin='spatial_magnitude', save_obj=False,
for model, i in enumerate(payout):
net = np.sum(i)
net_eqk = np.sum(i[ind[0], ind[1]])
result_i = csep.models.EvaluationResult(
name='Parimutuel Gambling Score',
test_distribution=(net, net_eqk),
sim_name=Forecasts[model][0].name)
sim_name=Forecasts[model].name)
results.append(result_i)
print(result_i.sim_name,net, net_eqk)
if save_obj:
......@@ -129,22 +128,6 @@ def plot_merged_batchs(Results1, Results2, ref, savepath=False):
fig.show()
def run(use_saved=False):
# models5 = get_models.get_models_5yr()
# models10 = get_models.get_models_10yr()
# catalog5 = get_catalogs.filter_5yr_08_2009(get_catalogs.taroni2008())
# catalog10 = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
#
# results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref=11)
# plot_pgs(results5, 5, 'MPS04after', savepath=filepaths.csep_figs)
# results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref=11)
# plot_pgs(results10, 10, 'MPS04after', savepath=filepaths.csep_figs)
# plot_merged_batchs(results10, results5, 'MPS04after', savepath=filepaths.csep_figs)
#
# results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref='avg', save_obj=filepaths.res_path['PGS_5'])
# plot_pgs(results5, 5, 'Average', savepath=filepaths.csep_figs)
# results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref='avg', save_obj=filepaths.res_path['PGS_10'])
# plot_pgs(results10, 10, 'Average', savepath=filepaths.csep_figs)
# plot_merged_batchs(results10, results5, 'Average', savepath=filepaths.csep_figs)
years = [5, 10]
test = 'PGS'
......@@ -160,18 +143,18 @@ def run(use_saved=False):
result_path = filepaths.get_csep_result_path(test, yr, typ)
if use_saved:
Result = pgs(models[yr],catalogs[yr], ref=typ, load_obj=result_path)
Result = pgs(models[yr], catalogs[yr], ref=typ, load_obj=result_path)
else:
Result = pgs(models[yr], catalogs[yr], ref=typ, save_obj=result_path)
Results[(yr, typ)] = Result
if typ==int:
if typ == int:
typ = models[yr][typ][0].name
plot_pgs(Result, yr, typ, savepath=filepaths.get_csep_fig_path(test, yr, 'ref%s'% typ))
for typ in types:
plot_merged_batchs(Results[(10, typ)], Results[(5, typ)], typ,
savepath=filepaths.get_csep_fig_path(test, '%i-%i' % (5,10), typ))
savepath=filepaths.get_csep_fig_path(test, '%i-%i' % (5, 10), typ))
if __name__ == '__main__':
run()
......@@ -64,11 +64,12 @@ def plot_single_results(Results, years, ref_model=0,folder=False):
plt.show()
def plot_all_results(Results, years, w_alpha=0.05, order=False, savepath=False):
def plot_all_results(Results, years, p=0.05, order=False, savepath=False):
names = [i[0].sim_name[1] for i in Results[0]]
T_value = np.array([[T_i.observed_statistic for T_i in Ref_model] for Ref_model in Results[0]]).T
W_value = np.array([[W_i.quantile for W_i in Ref_model] for Ref_model in Results[1]]).T
T_quantile = np.array([[T_i.test_distribution[0] for T_i in Ref_model] for Ref_model in Results[0]]).T
W_quantile = np.array([[W_i.quantile for W_i in Ref_model] for Ref_model in Results[1]]).T
score = np.sum(T_value, axis=1)/T_value.shape[0]
if order:
......@@ -77,8 +78,9 @@ def plot_all_results(Results, years, w_alpha=0.05, order=False, savepath=False):
arg_ind = np.arange(len(score))
data_t = T_value[arg_ind, :][:, arg_ind] ## Flip rows/cols if ordered by value
data_w = W_value[arg_ind, :][:, arg_ind]
data_w = W_quantile[arg_ind, :][:, arg_ind]
print(T_quantile)
data_tq = T_quantile[arg_ind, :][:, arg_ind]
fig, ax = plt.subplots(1, 1, figsize=(12,10))
cmap = sns.diverging_palette(220, 20, as_cmap=True)
......@@ -90,8 +92,12 @@ def plot_all_results(Results, years, w_alpha=0.05, order=False, savepath=False):
for n, i in enumerate(data_w):
for m, j in enumerate(i):
if j < w_alpha:
if j < p:
ax.scatter(n + 0.5, m + 0.5,marker='o', s=2, color='black')
for n, i in enumerate(data_tq):
for m, j in enumerate(i):
if j > 0:
ax.scatter(n + 0.5, m + 0.75,marker='o', s=4, color='green')
plt.text(len(score) + 2.5, -0.2, '$G_t$', fontsize=20)
for i, j in enumerate([score[i] for i in arg_ind]):
......@@ -129,15 +135,15 @@ def run(use_saved=False):
tw_figs_path = os.path.join(filepaths.csep_figs, 'tw_single')
os.makedirs(tw_figs_path, exist_ok=True)
for yr in years:
for model_i in range(len(models[yr])):
plot_single_results(Results[yr], years=yr, ref_model=model_i,
folder=tw_figs_path)
# for yr in years:
# for model_i in range(len(models[yr])):
# plot_single_results(Results[yr], years=yr, ref_model=model_i,
# folder=tw_figs_path)
for yr, typ in itertools.product(years, types):
plot_all_results(Results[yr], years=yr, order=typ,
savepath=filepaths.get_csep_fig_path(test, yr, type=typ*'order'))
if __name__ =='__main__':
use_saved=True
use_saved = True
run(use_saved)
......@@ -52,7 +52,6 @@ years = [5, 10]
store_years = {i: os.path.join(stored_csep, '%iyr') % i for i in years }
tests = ['N', 'M', 'S', 'L', 'CL', 'T', 'PGS']
# tests_path = {(i[0], j): os.path.join(i[1], j) for i, j in itertools.product(store_years.items(), tests)}
for i, j in store_years.items():
os.makedirs(j, exist_ok=True)
......@@ -74,11 +73,17 @@ def get_csep_result(test, year, type=False):
return pickle.load(open(get_csep_result_path(test, year, type), 'rb'))
# res_path = {'%s_%i' % (test, yr) : os.path.join(stored_csep, '%s_%i.obj' % (test, yr))
# for test, yr in itertools.product(tests, years)}
# Figures
figs = os.path.join(results, 'figures')
## Forecast figures
forecast_figures = {5: os.path.join(data, 'five-year-models-10.01/figures'),
10: os.path.join(data, 'ten-year-models-10.01/figures')}
for dir_ in forecast_figures.values():
os.makedirs(dir_, exist_ok=True)
## CSEP figures
csep_figs = os.path.join(figs, 'csep')
def get_csep_fig_path(test, year, type=None):
if type:
......
import csep
import pickle
import os
import cartopy
import filepaths
import get_models
plot_properties = {'grid_labels': True,
'borders': True,
......@@ -14,25 +15,31 @@ plot_properties = {'grid_labels': True,
'projection': cartopy.crs.Mercator()}
forecasts_dir = '../data/five-year-models-10.01/store/'
figures_dir = '../data/five-year-models-10.01/figures/'
for i in os.listdir(forecasts_dir):
if i.endswith('obj'):
with open(os.path.join(forecasts_dir , i), 'rb') as file_:
plot_properties['filename'] = os.path.join(figures_dir, i.replace('.obj', ''))
Forecast = pickle.load(file_)
plot_properties['title'] = Forecast[1]['name'] + ' ' + Forecast[1]['author']
forecasts = get_models.get_models_5yr()
figures_dir = filepaths.forecast_figures[5]
for model_i in forecasts:
plot_properties['filename'] = os.path.join(figures_dir, model_i.name)
plot_properties['title'] = model_i.name + ' ' + model_i.author
print(plot_properties['title'])
Forecast[0].plot(plot_args=plot_properties)
model_i.plot(plot_args=plot_properties)
forecasts_dir = '../data/ten-year-models-10.01/store/'
figures_dir = '../data/ten-year-models-10.01/figures/'
for i in os.listdir(forecasts_dir):
if i.endswith('obj'):
with open(os.path.join(forecasts_dir , i), 'rb') as file_:
plot_properties['filename'] = os.path.join(figures_dir, i.replace('.obj', ''))
Forecast = pickle.load(file_)
plot_properties['title'] = Forecast[1]['name'] + ' ' + Forecast[1]['author']
forecasts = get_models.get_models_10yr()
figures_dir = filepaths.forecast_figures[10]
for model_i in forecasts:
plot_properties['filename'] = os.path.join(figures_dir, model_i.name)
plot_properties['title'] = model_i.name + ' ' + model_i.author
print(plot_properties['title'])
Forecast[0].plot(plot_args=plot_properties)
model_i.plot(plot_args=plot_properties)
# forecasts_dir = '../data/ten-year-models-10.01/store/'
# figures_dir = '../data/ten-year-models-10.01/figures/'
# for i in os.listdir(forecasts_dir):
# if i.endswith('obj'):
# with open(os.path.join(forecasts_dir , i), 'rb') as file_:
# plot_properties['filename'] = os.path.join(figures_dir, i.replace('.obj', ''))
# Forecast = pickle.load(file_)
# plot_properties['title'] = Forecast[1]['name'] + ' ' + Forecast[1]['author']
# print(plot_properties['title'])
# Forecast[0].plot(plot_args=plot_properties)
......@@ -14,7 +14,7 @@ def norm(array):
return (array - array.min())/(array.max() -array.min())
def plot_consistency(ax, tests, model_i):
def plot_consistency(ax, tests, model_i, color):
Y_offset = 1.2 * 1.24
pos = ax.get_figure().transFigure.inverted().transform(
ax.transData.transform([(i, Y_offset ) for i in ax.get_xticks()]))[model_i]
......@@ -23,13 +23,19 @@ def plot_consistency(ax, tests, model_i):
dh = -0.02
cx = [pos[0] - sep*r*(len(tests)-1) + 2*sep*r*(i) for i in range(len(tests))]
for i, ci in zip(tests, cx):
artist = patches.Circle((ci, pos[1] + dh), r, fc='green')
artist = patches.Circle((ci, pos[1] + dh), r, fc=color)
ax.get_figure().add_artist(artist)
ax.get_figure().text(ci, pos[1] + dh, i, ha='center', va='center', color='white')
return ax
def plot_all_consistencies(Axes, Results, color='green'):
def plot_axis(axis,n_results, offset, end_theta, n, min_y, array, yticks, low_bound, color, label, fontsize):
for i, test_pass in enumerate(Results):
plot_consistency(Axes, test_pass, i, color=color)
return Axes
def plot_axis(axis,n_results, offset, end_theta, n, min_y,
array, yticks, low_bound, color, label, fontsize):
axis_angle = (end_theta + np.deg2rad(offset) + n*(2*np.pi-end_theta - 2*np.deg2rad(offset))/(n_results-1))
axis.plot([axis_angle, axis_angle], [min_y, 1. + min_y],
......@@ -63,7 +69,8 @@ def plot_rticks(axis, min_r, n_r):
axis.set_ylim([0, 1.0 + min_r])
return yticks
def Plot(arrays, colors, result_labels, model_labels, lowbounds, angle_offset=90, offset=10, min_y=0.3,ny=4, fontsize=9):
def plot_scores(arrays, colors, result_labels, model_labels,
lowbounds, angle_offset=90, offset=10, min_y=0.3, ny=4, fontsize=9):
# N plots
n_results = len(arrays)
......@@ -105,125 +112,146 @@ def Plot(arrays, colors, result_labels, model_labels, lowbounds, angle_offset=90
return ax
##### Results 5
results_all = get_results()
n_results = 3
n_models = 19
### Absolute values plot
## T_score
T = np.array([i.observed_statistic for i in results_all['TW_5'][0]]).reshape((n_models, n_models)).T
t_score = np.sum(T, axis=1) / T.shape[0]
t_lowcut = 0.1
t_score[t_score < t_lowcut] = t_lowcut
## PGS
pgs = np.array([i.test_distribution[0] for i in results_all['PGS_5']])
## Conditional Likelihood
cl_score = np.array([i.quantile for i in results_all['CL_5']])
## Consistency tests
alpha = 0.01
Consistency_Results = []
for n, m, s in zip(results_all['N_5'], results_all['M_5'], results_all['S_5']):
model_cons = []
if n.quantile[0] > alpha/2. and n.quantile[1] < 1-alpha/2.:
model_cons.append('N')
if m.quantile > alpha:
model_cons.append('M')
if s.quantile > alpha:
model_cons.append('S')
Consistency_Results.append(model_cons)
## Order results in terms of ~average performance for visualization purpose
names = [i.sim_name for i in results_all['CL_5']]
order = np.flip(np.argsort(norm(cl_score)+norm(pgs)+norm(t_score)))
pgs = pgs[order]
cl_score = cl_score[order]
t_score = t_score[order]
model_names = [names[i] for i in order]
Consistency_Results = [Consistency_Results[i] for i in order]
## Complete array
colors = ['orangered','darkgreen','steelblue']
Axes = Plot([t_score, pgs, cl_score], colors, ['IG', r'$\Delta R_j$', r'$\delta_{CL}$'],
model_names, angle_offset=90, offset=5, min_y=0.3,ny=4, lowbounds = [True, True, False])
for i, test_pass in enumerate(Consistency_Results):
plot_consistency(Axes, test_pass, i)
legend_elements = [Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[0]),
lw=5, label='T-test score'),
def plot_legends(Axes, colors, labels):
legend_elements = [Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[0]),
lw=5, label=labels[0]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[1]),
lw=5, label='PG score'),
lw=5, label=labels[1]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[2]),
lw=5, label='CL-test score'),
Line2D([0], [0], color='green',
lw=0, marker='o', markersize=15, label='Consistency tests')]
Axes.get_figure().legend(handles=legend_elements, loc='lower right')
Axes.set_title('Combined test results', pad=15, fontsize=14)
plt.show()
##### Results10
results_all = get_results(10)
n_results = 3
n_models = 19
### Absolute values plot
## T_score
T = np.array([i.observed_statistic for i in results_all['TW_10'][0]]).reshape((n_models, n_models)).T
t_score = np.sum(T, axis=1) / T.shape[0]
t_lowcut = 0.1
t_score[t_score < t_lowcut] = t_lowcut
## PGS
pgs = np.array([i.test_distribution[0] for i in results_all['PGS_10']])
## Conditional Likelihood
cl_score = np.array([i.quantile for i in results_all['CL_10']])
## Consistency tests
alpha = 0.01
Consistency_Results = []
for n, m, s in zip(results_all['N_10'], results_all['M_10'], results_all['S_10']):
lw=5, label=labels[2]),
Line2D([0], [0], color=colors[3],
lw=0, marker='o', markersize=15, label=labels[3])]
Axes.get_figure().legend(handles=legend_elements, loc='lower right')
return Axes
def plot_results(years, labels, ll_type=None, n_eqk = 1,
ref='avg', p=0.01,l_cl = 'L' , lowcuts=[False, False, False], show=True,
savepath=None):
### Information Gain
T = np.array([[T_i.observed_statistic for T_i in Ref_model] for
Ref_model in filepaths.get_csep_result('T', 5)[0]]).T
if ref == 'avg':
t_score = np.sum(T, axis=1) / T.shape[0]
else:
t_score = T[:, ref]
if lowcuts[1]:
t_score[t_score < lowcuts[1]] = lowcuts[1]
### Parimutuel Gambling score
PGS = filepaths.get_csep_result('PGS', years, ref)
pgs =np.array([i.test_distribution[0] for i in PGS]) ## << PGS in all bins
if lowcuts[0]:
pgs[pgs < lowcuts[0]] = lowcuts[0] # << For visualization purposes
## Conditional Likelihood
LL = filepaths.get_csep_result(l_cl, years)
if ll_type == 'quantile':
ll_score = np.array([i.quantile for i in LL])
ll_label = r'$\delta_{%s}$' % l_cl
else:
ll_score = np.array([i.observed_statistic for i in LL])
ll_label = r'$\log({%s})$' % l_cl
if isinstance(ref, int):
ll_score = (ll_score - ll_score[ref])/n_eqk
if lowcuts[2]:
ll_score[ll_score < lowcuts[2]] = lowcuts[2]
## Consistency tests
Consistency_Results = []
for n, m, s in zip(filepaths.get_csep_result('N', years),
filepaths.get_csep_result('M', years),
filepaths.get_csep_result('S', years)):
model_cons = []
if n.quantile[0] > alpha/2. and n.quantile[1] < 1-alpha/2.:
if n.quantile[0] > p/2. and n.quantile[1] < 1-p/2.:
model_cons.append('N')
if m.quantile > alpha:
if m.quantile > p:
model_cons.append('M')
if s.quantile > alpha:
if s.quantile > p:
model_cons.append('S')
Consistency_Results.append(model_cons)
## Order results in terms of ~average performance for visualization purpose
names = [i.sim_name for i in results_all['CL_10']]
order = np.flip(np.argsort(norm(cl_score)+norm(pgs)+norm(t_score)))
pgs = pgs[order]
cl_score = cl_score[order]
t_score = t_score[order]
model_names = [names[i] for i in order]
Consistency_Results = [Consistency_Results[i] for i in order]
## Complete array
colors = ['orangered','darkgreen','steelblue']
Axes = Plot([t_score, pgs, cl_score], colors, ['IG', r'$\Delta R_j$', r'$\delta_{CL}$'],
model_names, angle_offset=90, offset=5, min_y=0.3,ny=4, lowbounds = [True, True, False])
for i, test_pass in enumerate(Consistency_Results):
plot_consistency(Axes, test_pass, i)
legend_elements = [Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[0]),
lw=5, label='T-test score'),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[1]),
lw=5, label='PG score'),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[2]),
lw=5, label='CL-test score'),
Line2D([0], [0], color='green',
lw=0, marker='o', markersize=15, label='Consistency tests')]
Axes.get_figure().legend(handles=legend_elements, loc='lower right')
Axes.set_title('Combined test results', pad=15, fontsize=14)
plt.show()
## Get Model Names
if isinstance(ref, int):
ref_name = LL[ref].sim_name
else:
ref_name = 'Combined'
names = [i.sim_name for i in LL]
## Remove reference model
if isinstance(ref, int):
t_score = np.delete(t_score, ref)
pgs = np.delete(pgs, ref)
ll_score = np.delete(ll_score, ref)
Consistency_Results.pop(ref)
names.pop(ref)
## Order results in terms of ~average performance for visualization purpose
order_val = norm(ll_score) + norm(pgs) + norm(t_score)
order = np.flip(np.argsort(order_val))
pgs = pgs[order]
ll_score = ll_score[order]
t_score = t_score[order]
model_names = [names[i] for i in order]
Consistency_Results = [Consistency_Results[i] for i in order]
## Complete array
colors = ['orangered', 'darkgreen', 'steelblue', 'green']
Axes = plot_scores([t_score, pgs, ll_score], colors[:3], ['IG', r'$\Delta R_j$', ll_label],
model_names, angle_offset=90, offset=5, min_y=0.3, ny=4,
lowbounds = [bool(i) for i in lowcuts])
Axes = plot_all_consistencies(Axes, Consistency_Results, color=colors[3])
Axes = plot_legends(Axes, colors, labels)
Axes.set_title('Test results - %i years (%s)' % (years, ref_name) , pad=15, fontsize=14)
if savepath:
plt.savefig(savepath, dpi=300)
plt.show()
if __name__ == '__main__':
p = 0.05
N_5 = 14
N_10 = 21
#### IG, PGS, Conditional Log-Likelihood
labels = ['Information Gain (t-test)', 'Net Return (PGS)',
'$\log(L_N)$ quantile', 'Poisson Consistency $(p=%.2f)$' % p]
plot_results(5, l_cl='CL', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, 0.1, False],
savepath=filepaths.get_csep_fig_path('Total', 5, 'avg_CLq'))
plot_results(10,l_cl='CL', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, 0.1, False],
savepath=filepaths.get_csep_fig_path('Total', 10, 'avg_CLq'))
labels = ['Information Gain (t-test)', 'Net Return (PGS)',
'$\log(L_N)$ - $\log(L_N)_{ref}$', 'Poisson Consistency $(p=%.2f)$' % p]
plot_results(5, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -1, -15],
savepath=filepaths.get_csep_fig_path('Total', 5, 'ref11_CL'))
plot_results(10, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -2, -20],
savepath=filepaths.get_csep_fig_path('Total', 10, 'ref11_CL'))
#### IG, PGS, Log-Likelihood
labels = ['Information Gain (t-test)', 'Net Return (PGS)',
'$\log(L)$ quantile', 'Poisson Consistency $(p=%.2f)$' % p]
plot_results(5, l_cl='L', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, 0.1, False],
savepath=filepaths.get_csep_fig_path('Total', 5, 'avg_Lq'))
plot_results(10,l_cl='L', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, -1, False],
savepath=filepaths.get_csep_fig_path('Total', 10, 'avg_Lq'))
labels = ['Information Gain (t-test)', 'Net Return (PGS)',
'$\log(L)$ - $\log(L)_{ref}$', 'Poisson Consistency $(p=%.2f)$' % p]
plot_results(5, ref=11, l_cl='L', labels=labels, lowcuts=[False, -1, -15],
savepath=filepaths.get_csep_fig_path('Total', 5, 'ref11_L'))
plot_results(10, ref=11, l_cl='L', labels=labels, lowcuts=[False, -1, -15],
savepath=filepaths.get_csep_fig_path('Total', 10, 'ref11_L'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment