Commit 106e65a2 authored by Pablo Iturrieta Rebolledo's avatar Pablo Iturrieta Rebolledo
Browse files

Fixed N-test score calc

Fixed PGS plotting
parent 19776d08
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.8 (venv)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
......
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (venv)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (venv)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......@@ -2,7 +2,7 @@ import matplotlib.ticker as tick
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from csep.core import poisson_evaluations as poisson
from csep.utils import plots
from csep.utils import plots
import numpy as np
import copy
import scipy.stats
......
......@@ -8,6 +8,7 @@ import pickle
import copy
import os
from codes import get_catalogs, get_models, filepaths
import scipy
def process_forecasts_batch(Model_batch, catalog, save_obj=False, load_obj=False):
......@@ -105,7 +106,8 @@ def plot_merged_batchs(batch, savepath=False):
def get_score(batch):
Scores = []
for R in batch:
score = R.test_distribution[1]/R.observed_statistic
# score = R.test_distribution[1]/R.observed_statistic
score = 2*(1 - scipy.stats.poisson.cdf(R.observed_statistic, R.test_distribution[1]))
Scores.append([R.sim_name, score])
print(R.sim_name, score)
......@@ -128,11 +130,12 @@ def run(used_saved=False):
plot_results(Results5, years=5, savepath=filepaths.csep_figs)
plot_results(Results10, years=10, savepath=filepaths.csep_figs)
plot_merged_batchs(merge_results_batchs(Results10, Results5), savepath=filepaths.csep_figs)
get_score(Results5)
if __name__ == '__main__':
used_saved = False
run()
a =run(used_saved=True)
import matplotlib.ticker as tick
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from csep.core import poisson_evaluations as poisson
from csep.utils import plots
import csep
import pickle
import os
import numpy as np
import scipy.stats
from codes import get_catalogs, get_models, filepaths
def pgs(Forecasts, Catalog, ref='avg', bin='spatial_magnitude'):
def pgs(Forecasts, Catalog, ref='avg', bin='spatial_magnitude', save_obj=False, load_obj=False):
if load_obj:
try:
with open(load_obj, 'rb') as obj:
results = pickle.load(obj)
return results
except:
raise Exception('Results object not found')
k = len(Forecasts)
if bin=='spatial_magnitude':
if bin == 'spatial_magnitude':
catalog_counts = Catalog.spatial_magnitude_counts()
elif bin=='spatial':
elif bin == 'spatial':
catalog_counts = Catalog.spatial_counts().reshape(-1,1)
probabilities_forecasts = []
for f_i in Forecasts:
data = np.abs(f_i[0].data)
if bin=='spatial':
if bin == 'spatial':
data = np.sum(data, axis=1).reshape(-1,1 )
print(catalog_counts.shape, data.shape)
prob_i = scipy.stats.poisson.pmf(catalog_counts, data)
probabilities_forecasts.append(prob_i)
if ref == 'avg':
if ref == 'avg': ## Select average as head-to-head competitor
total_bet = np.sum(probabilities_forecasts, axis=0)/k
payout = np.zeros((k, catalog_counts.shape[0], catalog_counts.shape[1] ))
for i in range(payout.shape[0]):
payout[i] = (-1 + probabilities_forecasts[i]/total_bet)
elif isinstance(ref, int):
elif isinstance(ref, int): ## Choose model to do head-to-head betting
ref = probabilities_forecasts[ref]
payout = np.zeros((k, catalog_counts.shape[0], catalog_counts.shape[1]))
for i in range(payout.shape[0]):
payout[i] = -1 + 2*probabilities_forecasts[i] / (probabilities_forecasts[i]+ref)
net = []
net_eqk = []
ind = np.nonzero(catalog_counts)
for i in payout:
net.append(np.sum(i))
net_eqk.append(np.sum(i[ind[0], ind[1]]))
results = []
print('Model', 'Net Return', 'Net Return p/EQ')
for i,j,k in zip(Forecasts, net, net_eqk):
print(i[0].name, j, k)
print('Model', 'Net Return', 'Net Return in EQ bins')
for model, i in enumerate(payout):
net = np.sum(i)
net_eqk = np.sum(i[ind[0], ind[1]])
return [(i,j) for i,j in zip(net, net_eqk)]
result_i = csep.models.EvaluationResult(
name='Parimutuel Gambling Score',
test_distribution=(net, net_eqk),
sim_name=Forecasts[model][0].name)
results.append(result_i)
print(result_i.sim_name,net, net_eqk)
if save_obj:
with open(save_obj, 'wb') as obj:
pickle.dump(results, obj)
return results
def plot_pgs(Results, years, ref, savepath=False):
def plot_pgs(Results, Forecasts, years, ref, savepath=False): #todo fix reversing of lits
Results.reverse()
Forecasts.reverse()
fig, ax = plt.subplots(figsize=(7,6))
ax.set_title('Parimutuel Gambling Score\n%i years\n%s' % (years, ref), fontsize=16)
ax.axvline(0, linestyle='-', color='black')
for index, res in enumerate(Results):
ax.plot(res[0], index, color='blue', marker='o')
ax.plot(res[1], index, color='green', marker='*')
xlims = []
for index, res in enumerate(Results[::-1]):
ax.plot(res.test_distribution[0], index, color='blue', marker='o')
ax.plot(res.test_distribution[1], index, color='green', marker='*')
xlims.extend([res.test_distribution[0], res.test_distribution[1]])
ax.set_ylim([-0.5, len(Results) - 0.5])
ax.set_yticks(np.arange(len(Results)))
ax.set_yticklabels([i[0].name for i in Forecasts], fontsize=12)
ax.set_yticklabels([i.sim_name for i in Results], fontsize=12)
yTickPos = ax.get_yticks()
if len(yTickPos) >= 2:
ax.barh(yTickPos, np.array([99999] * len(yTickPos)), left=-10000,
height=(yTickPos[1] - yTickPos[0]), color=['w', 'gray'], alpha=0.2, zorder=0)
ax.set_xlim([np.array(Results).min()-1, np.array(Results).max()+1])
ax.set_xlim([min(xlims)-1, max(xlims)+1])
legend_elements = [Line2D([0], [0], marker='o', lw=0,label='Net return',
color='blue', markersize=5),
Line2D([0], [0], marker='*', color='green', lw=0 , markersize=5,
label='Net return in EQs bins')]
label='Net return in EQs bins')]
ax.legend(handles=legend_elements, loc='lower right')
fig.tight_layout()
if savepath:
fig.savefig(os.path.join(savepath, 'pgs_%iyr_%s.png' % (years, ref)) , dpi=300 )
fig.show()
def plot_pgs_double(Results1, Results2, Forecasts, ref, savepath=False): #todo fix reversing of lists
Results1.reverse()
Results2.reverse()
Forecasts.reverse()
def plot_merged_batchs(Results1, Results2, ref, savepath=False):
fig, ax = plt.subplots(figsize=(7,8))
ax.set_title('Parimutuel Gambling Score\n%s' % ref, fontsize=16)
ax.axvline(0, linestyle='-', color='black')
for index, res in enumerate(zip(Results1, Results2)):
ax.plot(res[0][0], 2*index, color='blue', marker='o')
ax.plot(res[1][0], 2*index+1, color='blue', marker='o')
ax.plot(res[0][1], 2 * index, color='green', marker='*')
ax.plot(res[1][1], 2 * index + 1, color='green', marker='*')
xlims = []
labels = []
for index, res in enumerate(zip(Results1[::-1], Results2[::-1])):
ax.plot(res[0].test_distribution[0], 2*index, color='blue', marker='o')
ax.plot(res[0].test_distribution[1], 2*index, color='green', marker='*')
ax.plot(res[1].test_distribution[0], 2*index+1, color='blue', marker='o')
ax.plot(res[1].test_distribution[1], 2*index+1, color='green', marker='*')
xlims.extend([res[0].test_distribution[0], res[0].test_distribution[1],
res[1].test_distribution[0], res[1].test_distribution[1]])
assert res[0].sim_name == res[1].sim_name, 'different models'
labels.append('10yr')
labels.append(res[0].sim_name + ' - 5yr')
ax.set_ylim([-0.5, 2*len(Results1) - 0.5])
ax.set_xlim([min(xlims)-1, max(xlims)+1])
ax.set_yticks(np.arange(2*len(Results1)))
labels = []
for i in Forecasts:
labels.append('10yr')
labels.append(i[0].name + ' - 5yr')
ax.set_yticklabels(labels, fontsize=12)
yTickPos, _ = plt.yticks()
yTickPos = yTickPos[:-1]
ax.barh(yTickPos[::2] + 0.5, 1000*np.array([45] * len(yTickPos[::2])), left=-1000,
height=2 * (yTickPos[1] - yTickPos[0]), color=['w', 'gray'], alpha=0.3)
ax.set_xlim([-15, 25])
legend_elements = [Line2D([0], [0], marker='o', lw=0,label='Net return',
color='blue', markersize=5),
Line2D([0], [0], marker='*', color='green', lw=0 , markersize=5,
......@@ -115,25 +128,24 @@ def plot_pgs_double(Results1, Results2, Forecasts, ref, savepath=False): #todo
fig.savefig(os.path.join(savepath, 'pgs_both_%s.png' % ref) , dpi=300 )
fig.show()
if __name__ == '__main__':
def run():
models5 = get_models.get_models_5yr()
catalog5 = get_catalogs.filter_5yr_08_2009(get_catalogs.taroni2008())
models10 = get_models.get_models_10yr()
catalog5 = get_catalogs.filter_5yr_08_2009(get_catalogs.taroni2008())
catalog10 = get_catalogs.filter_10yr_01_2010(get_catalogs.buolletino())
results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref=11)
plot_pgs(results5, 5, 'MPS04after', savepath=filepaths.csep_figs)
results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref=11)
plot_pgs(results10, 10, 'MPS04after', savepath=filepaths.csep_figs)
plot_merged_batchs(results10, results5, 'MPS04after', savepath=filepaths.csep_figs)
# results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref=11)
# plot_pgs(results5, models5, 5, 'MPS04after', savepath=filepaths.csep_figs)
results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref='avg')
plot_pgs(results5, models5, 5, 'Average', savepath=filepaths.csep_figs)
results5 = pgs(models5, catalog5, bin='spatial_magnitude', ref='avg', save_obj=filepaths.test_path['PGS_5'])
plot_pgs(results5, 5, 'Average', savepath=filepaths.csep_figs)
results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref='avg', save_obj=filepaths.test_path['PGS_10'])
plot_pgs(results10, 10, 'Average', savepath=filepaths.csep_figs)
plot_merged_batchs(results10, results5, 'Average', savepath=filepaths.csep_figs)
# results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref=11)
# plot_pgs(results10, models5, 10, 'MPS04after', savepath=filepaths.csep_figs)
results10 = pgs(models10, catalog10, bin='spatial_magnitude', ref='avg')
plot_pgs(results10, models5, 10, 'Average', savepath=filepaths.csep_figs)
plot_pgs_double(results5, results10, models5, 'Average')
\ No newline at end of file
if __name__ == '__main__':
run()
from codes import get_catalogs, get_models, filepaths
from csep_tests import n_test, m_test, s_test, l_test, cl_test
from csep_tests import n_test, m_test, s_test, l_test, cl_test, pgs
print(filepaths.test_path)
import likelihoodtests
import numpy as np
from scipy import io, stats
iFore_MinLon, iFore_MaxLon, iFore_MinLat, iFore_MaxLat, iFore_MinDepth, \
iFore_MaxDepth, iFore_MinMag, iFore_MaxMag, iFore_Rate, iFore_Mask, \
iFore_Observation, iFore_TrueProbability, iFore_FalseProbability, \
iFore_GamblingReturn = range(14)
iCat_Lon, iCat_Lat, iCat_DecYear, iCat_Month, iCat_Day, iCat_Mag, \
iCat_Depth = range(7)
def parimutuel_experiment(vForecastFiles, mCatalog, sResultFile,
sProbabilitiesFile, mForecastForBinning):
"""
Perform a parimutuel gambling analysis of the specified gridded rate
forecasts, pitting all forecasts against each other.
This analysis is analogous to all forecasts sitting around a roulette table
where only Red and Black are possible (i.e., no green). For each spin, each
gambler can abstain from betting or bet $1, split however she wishes between
Red and Black. The pot is then divided among all gamblers who placed a bet,
and the division is made according to the ratio of their correct bet to
total amount bet on the correct outcome. That is, if 3 gamblers bet $0.20,
$0.75, and $0.95 on the correct outcome, their payoffs are
[3*(0.2/(.2+.75+.95)) =]
$0.315789, $1.18421, and $1.50,
respectively, and the net returns are
-$0.684211, $0.18421, and $0.50.
To do this analysis, bin the observations and convert all forecast rates to
Poisson probabilities of the observation in each bin. Then, for each bin,
compute the payoff for each forecast. Finally, compute and save the net
returns for each forecast.
Also saved here: the net returns in bins where eqks occurred, the matrix of
forecast probabilities and the matrix of net returns per bin
"""
# determine the number of eqks in each bin (this assumes that all forecasts
# have the same format and ordering)
vEqksPerBin = likelihoodtests.eqks_per_bin(mCatalog, mForecastForBinning)
iBins = np.size(mForecastForBinning, 0)
iForecasts = np.size(vForecastFiles)
#% Now for each bin and for each forecast, compute the Poisson probability
#% of the observation given the forecast rate
mForecastProbabilities = np.zeros((iBins, iForecasts))
for iCnt in range(iForecasts):
sForecastFile = vForecastFiles[iCnt]
mForecast = io.loadmat(sForecastFile)['mModel']
vBinMaskingBits = mForecast[:, iFore_Mask] # Abstaining?
vRates = mForecast[:, iFore_Rate] * vBinMaskingBits
vProbabilities = stats.poisson.pmf(vEqksPerBin, vRates)
vProbabilities[np.isnan(vProbabilities)] = 0
# % Store the probabilities per bin
mForecastProbabilities[:, iCnt] = vProbabilities
#% All those who placed a bet split the pot in a way that is proportional to
#% the ratio between their correct bet and the total amount bet on the
#% correct outcome
vTotalBetPerBin = np.zeros(iBins)
for iCnt in range(iBins):
vTotalBetPerBin[iCnt] = np.count_nonzero(mForecastProbabilities[iCnt, :])
vTotalBetOnCorrectOutcomePerBin = np.sum(mForecastProbabilities[:, :], axis = 1)
mPayouts = np.zeros((iBins, iForecasts))
for iForecastCnt in range(iForecasts):
mPayouts[:, iForecastCnt] = mForecastProbabilities[:, iForecastCnt] / vTotalBetOnCorrectOutcomePerBin * vTotalBetPerBin
mNetReturns = np.zeros_like(mPayouts)
for iForecastCnt in range(iForecasts):
# subtract one if a bet was placed (i.e., probability is not zero),
# otherwise the net return is just the payout
mNetReturns[:, iForecastCnt] = mPayouts[:, iForecastCnt] - np.not_equal(mForecastProbabilities[:, iForecastCnt], 0).astype(int)
vTotalPayoutsPerForecast = np.sum(mPayouts, axis = 0)
vTotalBetsPerForecast = np.empty_like(vTotalPayoutsPerForecast)
for iCnt in range(iForecasts):
vTotalBetsPerForecast[iCnt] = np.count_nonzero(mForecastProbabilities[:, iCnt] > 0)
vNetReturnsPerForecast = vTotalPayoutsPerForecast - vTotalBetsPerForecast
# Now isolate the net returns where eqks occurred
vBinsWhereEqksOccurred = np.nonzero(vEqksPerBin)[0]
iBinsOfInterest = np.size(vBinsWhereEqksOccurred)
mForecastProbabilitiesOfInterest = mForecastProbabilities[vBinsWhereEqksOccurred, :]
vTotalBetPerBin = np.zeros(iBinsOfInterest)
for iCnt in range(iBinsOfInterest):
vTotalBetPerBin[iCnt] = np.count_nonzero(mForecastProbabilitiesOfInterest[iCnt, :])
vTotalBetOnCorrectOutcomePerBin = np.sum(mForecastProbabilitiesOfInterest[:, :], axis = 1)
mPayouts = np.zeros((iBinsOfInterest, iForecasts))
for iForecastCnt in range(iForecasts):
mPayouts[:, iForecastCnt] = mForecastProbabilitiesOfInterest[:, iForecastCnt] / vTotalBetOnCorrectOutcomePerBin * vTotalBetPerBin
vTotalPayoutsPerForecast = np.sum(mPayouts, axis = 0)
vTotalBetsPerForecast = np.empty_like(vTotalPayoutsPerForecast)
for iCnt in range(iForecasts):
vTotalBetsPerForecast[iCnt] = np.count_nonzero(mForecastProbabilitiesOfInterest[:, iCnt] > 0)
vNetReturnsPerForecastWhereEqksOccurred = vTotalPayoutsPerForecast - vTotalBetsPerForecast
np.savez(sResultFile, netReturns = vNetReturnsPerForecast,
netReturnWhereEqksOccurred = vNetReturnsPerForecastWhereEqksOccurred,
netReturnPerBin = mNetReturns)
np.save(sProbabilitiesFile, mForecastProbabilities)
def net_returns(vBets):
"""
Given a vector that represents a set of roundtable bets on the true outcome,
compute the net return (payout - bet) for each bettor. The pot is divided
according to the ratio of each bettor's bet to the total amount bet.
That is, if 3 gamblers bet $0.20, $0.75, and $0.95 on the true outcome,
they will receive [3*(0.2/(.2+.75+.95)) =]
$0.315789, $1.18421, and $1.50, respectively. We assume that each gambler
can only bet $0 or $1, so the net returns for this scenario are -$0.68,
$0.18, and $0.50, respectively
"""
fTotalBet = np.count_nonzero(vBets)
fTotalBetOnOutcome = np.sum(vBets)
vPayouts = vBets * fTotalBet / fTotalBetOnOutcome
vNetReturns = vPayouts - np.int8(vBets > 0)
return vNetReturns.ravel()
from codes import filters
from numpy import *
from scipy import special
import numpy as np
iFore_MinLon, iFore_MaxLon, iFore_MinLat, iFore_MaxLat, iFore_MinDepth, \
iFore_MaxDepth, iFore_MinMag, iFore_MaxMag, iFore_Rate, iFore_Mask = \
range(10)
iCat_Lon, iCat_Lat, iCat_DecYear, iCat_Month, iCat_Day, iCat_Mag, iCat_Depth = \
range(7)
def loglikelihood(vExpectations, vObservations):
"""
Compute the log-likelihood of the observation in each bin given the
specified expectation vector
"""
np.log(special.factorial(0))
vLL = -vExpectations + vObservations * np.log(vExpectations) - \
np.log(special.factorial(vObservations))
vLL[np.isinf(vLL)] = 0
vLL[np.isnan(vLL)] = 0
return vLL
def eqks_per_bin(mCatalog, mForecast):
"""
Bin the specified catalog into the bins of the specified forecast and return
a vector that includes the number of eqks in each bin
"""
mFilteredCatalog = filters.catalog_filtered_to_unmasked_area(mForecast,
mCatalog)
# Get the number of earthquakes in catalog
iEqks = np.size(mFilteredCatalog, 0)
iBins = np.size(mForecast, 0)
vEqksPerBin = np.zeros(iBins)
for iCnt in range(iEqks):
fLon = mFilteredCatalog[iCnt, iCat_Lon]
fLat = mFilteredCatalog[iCnt, iCat_Lat]
fDepth = mFilteredCatalog[iCnt, iCat_Depth]
fMag = mFilteredCatalog[iCnt, iCat_Mag]
iIndexOfInterest = np.nonzero((mForecast[:, iFore_MinLon] <= fLon) * \
(mForecast[:, iFore_MaxLon] > fLon) * \
(mForecast[:, iFore_MinLat] <= fLat) * \
(mForecast[:, iFore_MaxLat] > fLat) * \
(mForecast[:, iFore_MinDepth] <= fDepth) * \
(mForecast[:, iFore_MaxDepth] > fDepth) * \
(mForecast[:, iFore_MinMag] <= fMag) * \
(mForecast[:, iFore_MaxMag] > fMag) * \
(mForecast[:, iFore_Mask] > 0))[0]
if np.size(iIndexOfInterest) == 0:
print("Lon, Lat, Depth, fMag")
print(str(fLon) + ", " + str(fLat) + ", " + str(fDepth) + ", " + \
str(fMag))
exit()
else:
vEqksPerBin[iIndexOfInterest] += 1
return vEqksPerBin
\ No newline at end of file
import numpy as np
def boynton_colors(iNumberOfColors):
"""
Return an array with the specified number of colors, n, where the array is
made up of the first n so-called Boynton non-confusing colors. See Table 1:
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.65.2790&rep=rep1&type=pdf
"""
vBoyntonColors = np.array(['white', 'blue', 'red', 'green', 'black',
'yellow', 'magenta', 'pink', 'grey', 'brown',
'orange'])
iNumberOfBoyntonColors = np.size(vBoyntonColors, 0)
if (iNumberOfColors > iNumberOfBoyntonColors):
print("There are only " + str(iNumberOfBoyntonColors) + " Boyton " + \
"colors, try random_colors_without_replacement instead" )
vColors = vBoyntonColors[0:iNumberOfColors]
return vColors
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment