Commit 0af670d8 authored by Pablo Iturrieta Rebolledo's avatar Pablo Iturrieta Rebolledo
Browse files

updated brier score and multi score figure

parent feeb9a95
......@@ -2,12 +2,9 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (venv)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.7 (venv)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="renderExternalDocumentation" value="true" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
......
......@@ -13,20 +13,6 @@ import os
from codes import get_catalogs, get_models, paths
def mod_bessel0(lambd, iter):
I_0 = 0
for i in range(iter):
I_0 += (lambd**2 / 4) ** (i) / \
(scipy.special.factorial(i))**2
return I_0
#
def p_series(lambd, iter = 10):
p2 = np.exp(-2*lambd) * mod_bessel0(lambd, iter)
return p2
def p_series(lambd, iter = 100):
p2 = 0
......@@ -44,52 +30,76 @@ def brier_score(Forecast, Catalog, spatial_only=True, binary=True):
data = Forecast.data
obs = Catalog.spatial_magnitude_counts()
brier = 0
if binary:
obs = (obs >= 1).astype(int)
prob_success = 1 - scipy.stats.poisson.cdf(0, data)
brier = []
for p, o in zip(prob_success.ravel(), obs.ravel()):
if o == 0:
brier += -2 * p**2
brier.append(-2 * p**2)
else:
brier += -2 * (p - 1)**2
brier.append(-2 * (p - 1)**2)
brier = np.sum(brier)
else:
prob_success = scipy.stats.poisson.pmf(obs, data)
brier = 2 * (prob_success) - p_series(data) - 1
brier = np.sum(brier)
plt.show()
for n_dim in obs.shape:
brier /= n_dim
return brier
def brier_score_batch(Forecasts, Catalog, save_obj):
results = []
for forecast in Forecasts:
global namess
namess = forecast.name
brier_spat_bin = brier_score(forecast, Catalog, spatial_only=True, binary=True)
brier_spat_nonbin = brier_score(forecast, Catalog, spatial_only=True, binary=False)
brier_spatmag_bin = brier_score(forecast, Catalog, spatial_only=False, binary=True)
brier_spatmag_nonbin = brier_score(forecast, Catalog, spatial_only=False, binary=False)
result_i = csep.models.EvaluationResult(
name='Brier score',
test_distribution=(brier_spat_bin, brier_spat_nonbin,
brier_spatmag_bin, brier_spatmag_nonbin),
sim_name=forecast.name)
results.append(result_i)
index = np.argsort([i.test_distribution[2] for i in results])
for i in index:
print(results[i].sim_name, results[i].test_distribution[2])
if save_obj:
with open(save_obj, 'wb') as obj:
pickle.dump(results, obj)
if __name__ == '__main__':
# use_saved = True
# forecast1 = get_models.ten_years()[1]
# forecast2 = get_models.ten_years()[5]
# forecast_ref = get_models.ten_years()[11]
#
catalog = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
# with open('trial.obj', 'wb') as file_:
# pickle.dump((forecast1, forecast2, forecast_ref, catalog), file_)
# with open('trial.obj', 'rb') as file_:
# forecast1, forecast2, forecast_ref, catalog = pickle.load(file_)
brier = []
name = []
for model in get_models.ten_years():
br = brier_score(model, catalog, binary=False, spatial_only=False)
brier.append(br)
name.append(model.name)
ind = np.argsort(brier)
brier = np.sort(brier)
name = [name[i] for i in ind]
for i, j in zip(brier, name):
print(i, j)
forecasts = get_models.ten_years()
catalogs = ['bollettino', 'emrcmt', 'horus']
for cat_name in catalogs:
if cat_name == 'bollettino':
cat = get_catalogs.bollettino()
elif cat_name == 'horus':
cat = get_catalogs.horus()
elif cat_name == 'emrcmt':
cat = get_catalogs.emrcmt()
cat = get_catalogs.filter_10yr_01_2010(cat)
brier_score_batch(forecasts, cat,
save_obj=paths.get_csep_result_path('B', 10, cat_name))
# Indexes where LTST has great forecast values
# global indexx
# indexx = np.array([[7249 , 0],
# [7249 , 1],
# [7249 , 2],
# [7249 ,3],
# [7249 , 4],
# [7249 , 5],
# [7250 , 0],
# [7250 , 1]])
\ No newline at end of file
......@@ -7,13 +7,10 @@ import numpy as np
import matplotlib.patches as patches
from matplotlib.lines import Line2D
def get_results(years):
return {(yr, test) : pickle.load(open(item, 'rb')) for key, item in paths.res_path.items()}
def norm(array):
return (array - array.min())/(array.max() -array.min())
def plot_consistency(ax, tests, model_i, color):
Y_offset = 1.2 * 1.24
pos = ax.get_figure().transFigure.inverted().transform(
......@@ -29,20 +26,19 @@ def plot_consistency(ax, tests, model_i, color):
return ax
def plot_all_consistencies(Axes, Results, color='green'):
for i, test_pass in enumerate(Results):
plot_consistency(Axes, test_pass, i, color=color)
return Axes
def plot_axis(axis,n_results, offset, end_theta, n, min_y,
array, yticks, low_bound, color, label, fontsize):
array, yticks, low_bound, color, label, fontsize, format):
axis_angle = (end_theta + np.deg2rad(offset) + n*(2*np.pi-end_theta - 2*np.deg2rad(offset))/(n_results-1))
axis.plot([axis_angle, axis_angle], [min_y, 1. + min_y],
color=color, linewidth=2, linestyle=':', alpha=0.5)
for i in yticks:
for i,j in zip(yticks, format):
val = (i - min_y) * (array.max() - array.min()) + array.min()
val = '%.2f' % val
val = j % val
if low_bound and i == yticks[0]:
val = '<'+ val
axis.text(axis_angle, i - 0.03, val,
......@@ -70,7 +66,7 @@ def plot_rticks(axis, min_r, n_r):
return yticks
def plot_scores(arrays, colors, result_labels, model_labels,
lowbounds, angle_offset=90, offset=10, min_y=0.3, ny=4, fontsize=9):
lowbounds, format, angle_offset=90, offset=10, min_y=0.3, ny=4, fontsize=9):
# N plots
n_results = len(arrays)
......@@ -108,7 +104,7 @@ def plot_scores(arrays, colors, result_labels, model_labels,
yticks = plot_rticks(ax, min_y, ny)
for i in range(n_results):
plot_axis(ax,n_results, offset, end_theta, i, min_y, arrays[i], yticks, lowbounds[i],
color_array[i], result_labels[i], fontsize)
color_array[i], result_labels[i], fontsize, format)
return ax
......@@ -119,17 +115,22 @@ def plot_legends(Axes, colors, labels):
lw=5, label=labels[1]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[2]),
lw=5, label=labels[2]),
Line2D([0], [0], color=colors[3],
lw=0, marker='o', markersize=15, label=labels[3])]
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[3]),
lw=5, label=labels[3]),
Line2D([0], [0], color=colors[4],
lw=0, marker='o', markersize=15, label=labels[4])]
Axes.get_figure().legend(handles=legend_elements, loc='lower right')
return Axes
def plot_results(years, labels, ll_type=None, n_eqk = 1,
ref='avg', p=0.01,l_cl = 'L' , lowcuts=[False, False, False], show=True,
def plot_results(years, labels, format='%.2f', ll_type=None, n_eqk = 1,
ref='avg', p=0.01, l_cl = 'L' , lowcuts=False, show=True,
savepath=None, catalog=False):
if isinstance(format, str):
format = [format] * len(labels)
if lowcuts is False:
lowcuts = [lowcuts] * len(labels)
### Information Gain
T = np.array([[T_i.observed_statistic for T_i in Ref_model] for
Ref_model in paths.get_csep_result('T', years, catalog)[0]]).T
......@@ -203,7 +204,7 @@ def plot_results(years, labels, ll_type=None, n_eqk = 1,
colors = ['orangered', 'darkgreen', 'steelblue', 'green']
Axes = plot_scores([t_score, pgs, ll_score], colors[:3], ['IG', r'$\Delta R_j$', ll_label],
model_names, angle_offset=90, offset=5, min_y=0.3, ny=4,
model_names, format=format, angle_offset=90, offset=5, min_y=0.3, ny=4,
lowbounds = [bool(i) for i in lowcuts])
Axes = plot_all_consistencies(Axes, Consistency_Results, color=colors[3])
......@@ -215,6 +216,94 @@ def plot_results(years, labels, ll_type=None, n_eqk = 1,
plt.show()
def plot_results2(years,labels, p=0.01, lowcuts=False, show=True,
format='%.2f', savepath=None, catalog=False):
if isinstance(format, str):
format = [format] * len(labels)
if lowcuts is False:
lowcuts = [lowcuts] * len(labels)
## Log-Likelihood
LL = paths.get_csep_result('L', years, catalog)
ll_score = np.array([i.observed_statistic for i in LL])
ll_label = r'$\mathcal{L}$'
if isinstance(lowcuts[0], (float, int)):
print('a')
ll_score[ll_score < lowcuts[0]] = lowcuts[0]
## Conditional Log-Likelihood
CL = paths.get_csep_result('CL', years, catalog)
cl_score = np.array([i.observed_statistic for i in CL])
cl_label = r'$\mathcal{L}{\|N}$'
if isinstance(lowcuts[1], (float, int)):
cl_score[cl_score < lowcuts[1]] = lowcuts[1]
## Brier score
Brier_results = paths.get_csep_result('B', years, catalog)
Brier = np.array([i.test_distribution[3] for i in Brier_results])
brier_label = r'$\mathcal{B}$'
if isinstance(lowcuts[2], (float, int)):
Brier[Brier < lowcuts[2]] = lowcuts[2]
brier_bin_label = r'$\mathcal{B}_{\mathrm{bin}}$'
Brier_bin = np.array([i.test_distribution[2] for i in Brier_results])
if isinstance(lowcuts[3], (float, int)):
Brier_bin[Brier_bin < lowcuts[3]] = lowcuts[3]
## Consistency tests
Consistency_Results = []
for n, m, s in zip(paths.get_csep_result('N', years, catalog),
paths.get_csep_result('M', years, catalog),
paths.get_csep_result('S', years, catalog)):
model_cons = []
if n.quantile[0] > p/2. and n.quantile[1] < 1-p/2.:
model_cons.append('N')
if m.quantile > p:
model_cons.append('M')
if s.quantile > p:
model_cons.append('S')
Consistency_Results.append(model_cons)
ref_name = 'All models'
names = [i.sim_name for i in LL]
## Order results in terms of ~average performance for visualization purpose
order_val = norm(ll_score) + norm(cl_score) + norm(Brier_bin) + norm(Brier)
order = np.flip(np.argsort(order_val))
ll_score = ll_score[order]
cl_score = cl_score[order]
Brier = Brier[order]
Brier_bin = Brier_bin[order]
model_names = [names[i] for i in order]
Consistency_Results = [Consistency_Results[i] for i in order]
## Complete array
colors = ['darkorange', 'orange', 'steelblue', 'blue', 'green']
Axes = plot_scores([ll_score, cl_score, Brier, Brier_bin],
colors[:4],
[ll_label, cl_label, brier_label, brier_bin_label],
model_names,format=format, angle_offset=90, offset=5, min_y=0.3, ny=4,
lowbounds = [bool(i) for i in lowcuts])
print(lowcuts)
Axes = plot_all_consistencies(Axes, Consistency_Results, color=colors[4])
Axes = plot_legends(Axes, colors, labels)
Axes.set_title('Test results - %i years (%s)' % (years, ref_name) , pad=15, fontsize=14)
# if savepath:
# plt.savefig(savepath, dpi=300, transparent=True)
plt.show()
if __name__ == '__main__':
p = 0.05
N_5 = 14
......@@ -224,7 +313,7 @@ if __name__ == '__main__':
catalogs = ['bollettino', 'horus', 'emrcmt']
for cat in catalogs:
for cat in catalogs[:1]:
# labels = ['Information Gain (t-test)', 'Net Return (PGS)',
# '$\log(L_N)$ quantile', 'Poisson Consistency $(p=%.2f)$' % p]
# plot_results(5, l_cl='CL', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, 0.1, False],
......@@ -233,14 +322,26 @@ if __name__ == '__main__':
# plot_results(10, l_cl='CL', ll_type='quantile', labels=labels, ref='avg', lowcuts=[False, 0.1, False],
# savepath=paths.get_csep_figpath('Total', 10, 'avg_CLq'))
labels = ['Information Gain (t-test)', 'Net Return (PGS)',
'$\log(L_N)$ - $\log(L_N)_{ref}$', 'Poisson Consistency $(p=%.2f)$' % p]
plot_results(5, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -1, -15],
savepath=paths.get_csep_figpath('Total', 5, 'ref11_CL_%s' % cat), catalog=cat)
plot_results(10, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -2, -20],
savepath=paths.get_csep_figpath('Total', 10, 'ref11_CL_%s' % cat), catalog=cat)
#### Final figures
labels = ['Log-Likelihood $\mathcal{L}$',
'Conditional Log-Likelihood $\mathcal{L}_{\|N}$',
'Brier score $\mathcal{B}$',
'Binary Brier score $\mathcal{B}_{\mathrm{bin}}$',
'Poisson Consistency $(p=%.2f)$' % p]
# format+
# plot_results(5, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -1, -15],
# savepath=paths.get_csep_figpath('Total', 5, 'ref11_CL_%s' % cat), catalog=cat)
# plot_results(10, ref=11, l_cl='CL', labels=labels, lowcuts=[False, -2, -20],
# savepath=paths.get_csep_figpath('Total', 10, 'ref11_CL_%s' % cat), catalog=cat)
plot_results2(10,
labels=labels,
format=['%i', '%i', '%.3e', '%.3e'],
lowcuts=[-195, -195, -0.0001086, -0.0001086],
savepath=paths.get_csep_figpath('Scores', 10, 'Total'),
catalog=cat)
####
#### IG, PGS, Log-Likelihood
# labels = ['Information Gain (t-test)', 'Net Return (PGS)',
......
from codes import get_catalogs, get_models, paths
import pickle
import itertools
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as patches
from matplotlib.lines import Line2D
def sig_exp(num_str):
parts = num_str.split('.', 2)
decimal = parts[1] if len(parts) > 1 else ''
exp = -len(decimal)
digits = parts[0].lstrip('0') + decimal
trimmed = digits.rstrip('0')
exp += len(digits) - len(trimmed)
sig = int(trimmed) if trimmed else 0
return sig, exp
def norm(array):
return (array - array.min())/(array.max() -array.min())
def plot_consistency(ax, tests, model_i, color):
Y_offset = 1.2 * 1.24
pos = ax.get_figure().transFigure.inverted().transform(
ax.transData.transform([(i, Y_offset ) for i in ax.get_xticks()]))[model_i]
r = 0.01
sep =1.2
dh = -0.02
cx = [pos[0] - sep*r*(len(tests)-1) + 2*sep*r*(i) for i in range(len(tests))]
for i, ci in zip(tests, cx):
artist = patches.Circle((ci, pos[1] + dh), r, fc=color)
ax.get_figure().add_artist(artist)
ax.get_figure().text(ci, pos[1] + dh, i, ha='center', va='center', color='white')
return ax
def plot_all_consistencies(Axes, Results, color='green'):
for i, test_pass in enumerate(Results):
plot_consistency(Axes, test_pass, i, color=color)
return Axes
def plot_axis(axis, n_results, offset, end_theta, n, min_y,
array, yticks, low_bound, color, label, fontsize, format):
axis_angle = (end_theta + np.deg2rad(offset) + n*(2*np.pi-end_theta - 2*np.deg2rad(offset))/(n_results-1))
axis.plot([axis_angle, axis_angle], [min_y, 1. + min_y],
color=color, linewidth=2, linestyle=':', alpha=0.5)
for i in yticks:
val = (i - min_y) * (array.max() - array.min()) + array.min()
if 'e' in format:
# val = format % val
print(val)
val, exp = sig_exp(str(val))
val = str(val)
else:
val = format % val
if low_bound and i == yticks[0]:
val = '<' + val
axis.text(axis_angle, i - 0.03, val,
rotation=np.rad2deg(axis_angle) + 90, color=color,
ha='center', va='center', fontsize=fontsize)
# axis.text(axis_angle, i + 0.15, label, color=color,
# ha='center', va='center', fontsize=14)
def plot_theta_seps(axis, name_models, end_angle, n_model, n_results, width_model,min_y=0):
for i in np.linspace(0.0, end_angle, n_model + 1): # ygrid separators
axis.plot([i , i],
[min_y, 1. + min_y], color='grey', linewidth=0.5)
axis.get_xaxis().set_ticks(np.linspace(0.0, end_angle , n_model,
endpoint=False) + n_results/2*width_model)
axis.get_xaxis().set_ticklabels(name_models)
axis.tick_params(axis='x', which='major', pad=20)
def plot_rticks(axis, min_r, n_r):
axis.get_yaxis().grid(zorder=0, label=False)
yticks = [min_r]
yticks.extend([i + min_r for i in np.linspace(0,1,n_r+1)])
axis.get_yaxis().set_ticks(yticks)
axis.get_yaxis().set_ticklabels([])
axis.set_ylim([0, 1.0 + min_r])
return yticks
def plot_scores(arrays, colors, result_labels, model_labels,
lowbounds, format, angle_offset=90, offset=10, min_y=0.3, ny=4, fontsize=9):
# N plots
n_results = len(arrays)
n_models = len(arrays[0])
# Plot properties
figsize = (8,8)
end_theta = 2 * np.pi - angle_offset / 360 * 2 * np.pi
width = end_theta/n_models/n_results
theta = np.linspace(0.0, end_theta, n_results * n_models, endpoint=False)
# Rearange results and colors
tuple_arrays = tuple([norm(i) for i in arrays])
score_total = np.vstack(tuple_arrays).T.ravel()
color_array = [plt.cm.colors.to_rgb(i) for i in colors]
tuple_colors = tuple([[color for i in range(n_models)] for color in color_array])
colors_full = np.hstack(tuple_colors).reshape((n_models*n_results,3))
# Create figure
ax = plt.subplot(111,projection='polar')
fig = ax.get_figure()
fig.set_size_inches(figsize)
ax.grid(False)
# Plot Data
ax.bar(theta + width/2., score_total, width=width, bottom=min_y,
color=colors_full, alpha=1, zorder=0)
# Plot shaded region
ax.bar(np.pi+end_theta/2, 1 + min_y, width= 2*np.pi - end_theta, color='grey', alpha=0.2, bottom=min_y)
ax.bar(0, min_y, width= 2*np.pi , color='grey', alpha=0.2)
# Plot auxiliaries
plot_theta_seps(ax, model_labels, end_theta, n_models,n_results, width, min_y=min_y)
yticks = plot_rticks(ax, min_y, ny)
# todo make generic
print(angle_offset)
for k, i in enumerate([0, 2]):
plot_axis(ax, 2, offset, end_theta, k, min_y, arrays[i], yticks, lowbounds[i],
color_array[i], result_labels[i], fontsize, format[i])
return ax
def plot_legends(Axes, colors, labels):
legend_elements = [Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[0]),
lw=5, label=labels[0]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[1]),
lw=5, label=labels[1]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[2]),
lw=5, label=labels[2]),
Line2D([0], [0], color=plt.cm.colors.to_rgb(colors[3]),
lw=5, label=labels[3]),
Line2D([0], [0], color=colors[4],
lw=0, marker='o', markersize=15, label=labels[4])]
Axes.get_figure().legend(handles=legend_elements, loc='lower right')
return Axes
def plot_results(years, labels, p=0.01, lowcuts=False, show=True,
format='%.2f', savepath=None, catalog=False):
if isinstance(format, str):
format = [format] * len(labels)
if lowcuts is False:
lowcuts = [lowcuts] * len(labels)
## Log-Likelihood
LL = paths.get_csep_result('L', years, catalog)
ll_score = np.array([i.observed_statistic for i in LL])
ll_label = r'$\mathcal{L}$'
if isinstance(lowcuts[0], (float, int)):
print('a')
ll_score[ll_score < lowcuts[0]] = lowcuts[0]
## Conditional Log-Likelihood
CL = paths.get_csep_result('CL', years, catalog)
cl_score = np.array([i.observed_statistic for i in CL])
cl_label = r'$\mathcal{L}{\|N}$'
if isinstance(lowcuts[1], (float, int)):
cl_score[cl_score < lowcuts[1]] = lowcuts[1]
## Brier score
Brier_results = paths.get_csep_result('B', years, catalog)
Brier = np.array([i.test_distribution[3] for i in Brier_results])
print(Brier)
brier_label = r'$\mathcal{B}$'
if isinstance(lowcuts[2], (float, int)):
Brier[Brier < lowcuts[2]] = lowcuts[2]
brier_bin_label = r'$\mathcal{B}_{\mathrm{bin}}$'
Brier_bin = np.array([i.test_distribution[2] for i in Brier_results])
if isinstance(lowcuts[3], (float, int)):
Brier_bin[Brier_bin < lowcuts[3]] = lowcuts[3]
## Consistency tests
Consistency_Results = []
for n, m, s in zip(paths.get_csep_result('N', years, catalog),
paths.get_csep_result('M', years, catalog),
paths.get_csep_result('S', years, catalog)):
model_cons = []
if n.quantile[0] > p/2. and n.quantile[1] < 1-p/2.:
model_cons.append('N')
if m.quantile > p:
model_cons.append('M')
if s.quantile > p:
model_cons.append('S')
Consistency_Results.append(model_cons)
ref_name = 'All models'
names = [i.sim_name for i in LL]
## Order results in terms of ~average performance for visualization purpose
order_val = norm(ll_score) + norm(cl_score) + norm(Brier_bin) + norm(Brier)
order = np.flip(np.argsort(order_val))
ll_score = ll_score[order]
cl_score = cl_score[order]
Brier = Brier[order]
Brier_bin = Brier_bin[order]
model_names = [names[i] for i in order]
Consistency_Results = [Consistency_Results[i] for i in order]