Commit f774ba3a authored by Pablo Iturrieta Rebolledo's avatar Pablo Iturrieta Rebolledo
Browse files

MOdified K ripley to calculate using mercator projection

parent 681f50db
from codes import paths
from codes import get_models
import matplotlib.pyplot as plt
import pickle
import numpy as np
# a = get_models.ten_years()
# with open('testmodel', 'wb') as file_:
# pickle.dump(a[1], file_)
with open('testmodel', 'rb') as file_:
model = pickle.load(file_)
with np.errstate(divide='ignore'):
image = np.log10(model.spatial_counts(cartesian=True))
image = np.nan_to_num(image)
plt.imshow(np.flipud(image), cmap='jet', vmin=-7, vmax=0)
plt.show()
fft = np.real(np.fft.fft2(image))
plt.imshow(fft, cmap='jet', vmin=-7, vmax=0)
plt.show()
......@@ -196,8 +196,8 @@ def get_entropy_measures(model, option='kmeans', log=True, plot=True, classbreak
spat = rpackages.importr("SpatEntropy")
Array = robjects.FloatVector(ndarray.ravel())
Mat = robjects.r['matrix'](Array, nrow=ndarray.shape[1])
Array = robjects.FloatVector(ndarray.T.ravel())
Mat = robjects.r['matrix'](Array, nrow=ndarray.shape[0])
shannon, rel_shannon = get_shannon(spat, Mat)
shannonZ, rel_shannonZ = get_shannonZ(spat, Mat)
......@@ -431,9 +431,9 @@ def plot_correlation(years, ref, classbreaks, method='kmeans', ll_type=None, n_e
if __name__ == '__main__':
classbreaks = [2, 4, 7, 10, 16, 25]
# run(classbreaks)
run(classbreaks)
# a = run_single()
# g = read_log('kmeans_True', '10')
g = read_log('kmeans_True', '10')
plt.close('all')
plot_correlation(10, 'avg', classbreaks=classbreaks, method='kmeans_False')
# plot_binnedforecast()
......
......@@ -13,6 +13,8 @@ from rpy2 import robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects import globalenv
import cartopy
import seaborn as sns
def get_simulations(model, catalog, lons, lats):
......@@ -78,7 +80,20 @@ def lonlat2mercator(points):
Points = dest_crs.transform_points(src_crs, points[:,0], points[:,1])
return np.floor(Points[:,:2])
def K_r_spatstat(points, model, normpower=1,plot=False, r=None):
def pcf_spatstat(fv, method='c'):
## Import R spatstat modules
spat_core = rpackages.importr("spatstat.core")
## Call the pair correlation function
pcf_func = spat_core.pcf
pcf_args = (('X', fv), ('method', 'c'))
pcf_results = pcf_func.rcall(pcf_args, globalenv)
return pcf_results
def K_r_spatstat(points, model, normpower=2,plot=False, r=None):
#todo get cartesian coordinates
## Import R spatstat modules
......@@ -130,273 +145,151 @@ def K_r_spatstat(points, model, normpower=1,plot=False, r=None):
# Get results
kinhom = spat_core.Kinhom
K_results = kinhom.rcall(args, globalenv)
K = kinhom.rcall(args, globalenv)
K = list(K_results[2])
r_out = list(K_results[0])
if plot:
spat_geom.plot_im(image, 'asd', multiplot=True)
spat_geom.plot_ppp(PPP_R, 'asd', add=True)
# plt.plot(r_out, K, '--')
# plt.show()
return K, r_out
return K
def plot_results(Results, alpha=0.01):
for key, value in Results.items():
K_sims = value['K_sims']
K_cat = value['K_cat']
r_cat = value['rk_cat']
r_sims = value['Rk_sims'][0]
K_avg = np.mean(K_sims, axis=0)
K_up = np.quantile(K_sims, 1-alpha/2, axis=0)
K_down = np.quantile(K_sims, alpha/2, axis=0)
# sns.set_theme(font='sans-serif')
sns.set_style("darkgrid", {"axes.facecolor": ".9", 'font.family':'Ubuntu'})
sns.lineplot(r_cat, K_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, K_avg, label='Sim. average')
# sns.lin
plt.fill_between(r_sims, K_down, K_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-2*alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$\hat{K}(r)$")
plt.legend(loc=2)
plt.savefig(paths.get_ripleyk_figpath('K', 10, key))
plt.show()
L_sims = value['L_sims']
L_cat = value['L_cat']
L_avg = np.mean(L_sims, axis=0)
L_up = np.quantile(L_sims, 1-alpha/2, axis=0)
L_down = np.quantile(L_sims, alpha/2, axis=0)
sns.lineplot(r_cat, L_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, L_avg, label='Sim. average')
plt.fill_between(r_sims, L_down, L_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$L(r) = \sqrt{\frac{\hat{K}(r)}{\pi}}$")
plt.legend(loc=4)
plt.savefig(paths.get_ripleyk_figpath('L', 10, key))
plt.show()
pcf_sims = value['PCF_sims']
pcf_cat = value['pcf_cat']
r_cat = value['rpcf_cat']
r_sims = value['Rpcf_sims'][0]
pcf_avg = np.mean(pcf_sims, axis=0)
pcf_up = np.quantile(pcf_sims, 1-alpha/2, axis=0)
pcf_down = np.quantile(pcf_sims, alpha/2, axis=0)
sns.lineplot(r_cat, pcf_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, pcf_avg, label='Sim. average')
plt.fill_between(r_sims, pcf_down, pcf_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$g(r) = \frac{1}{2\pi r}\,\frac{dK(r)}{dr}$")
plt.legend(loc=1)
plt.savefig(paths.get_ripleyk_figpath('pcf', 10, key))
plt.show()
if __name__ == "__main__":
def run():
models = get_models.ten_years()
catalog = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
Results = dict.fromkeys([i.name for i in models])
K_total = dict.fromkeys([i.name for i in models])
for model in models[1:2]:
for model in models:
print(model.name)
points = np.stack((catalog.get_longitudes(), catalog.get_latitudes())).T
r = np.linspace(0,800,50)
K_cat, r_cat = K_r_spatstat(points, model, r=r)
r = np.linspace(0,800,100)
K = K_r_spatstat(points, model, r=r)
rk_cat = K[0]
K_cat = K[2]
PCF = pcf_spatstat(K)
rpcf_cat = PCF[0]
pcf_cat = PCF[2]
lons = model.get_longitudes()
lats = model.get_latitudes()
nsim = 100
nsim = 200
sim_catalogs = iter([get_simulations(model, catalog, lons, lats) for i in range(nsim)])
Rk_sims = []
K_sims = []
R_sims = []
Rpcf_sims = []
PCF_sims = []
n=0
for cat, inds in sim_catalogs:
K_i, r_i = K_r_spatstat(cat, model, r=r, plot=True)
K_sims.append(K_i)
R_sims.append(R_sims)
plt.plot(r_i, K_i, '.-')
plt.title('Model %s - All simulations' % model.name)
assert r_i == r_cat
plt.grid()
plt.savefig(paths.get_ripleyk_figpath('K_sim', 10, model.name))
plt.show()
K = K_r_spatstat(cat, model, r=r)
rk_i = list(K[0])
K_i = list(K[2])
K = K_sims
K_avg = np.mean(K, axis=0)
K_up = np.quantile(K, 0.99, axis=0)
K_down = np.quantile(K, 0.01, axis=0)
plt.plot(r_cat, K_cat, color='r')
plt.plot(r_cat, K_avg)
plt.fill_between(r_cat, K_down, K_up,
color='gray', alpha=0.2)
plt.title('Model %s - K function' % model.name)
plt.grid()
plt.savefig(paths.get_ripleyk_figpath('K', 10, model.name))
plt.show()
PCF = pcf_spatstat(K)
rpcf_i = list(PCF[0])
pcf_i = list(PCF[2])
Rk_sims.append(rk_i)
K_sims.append(K_i)
Rpcf_sims.append(rpcf_i)
PCF_sims.append(pcf_i)
assert numpy.allclose(np.mean(Rk_sims, axis=0), np.array(Rk_sims[0]))
assert numpy.allclose(np.mean(Rpcf_sims, axis=0), np.array(Rpcf_sims[0]))
L = np.divide(np.sqrt(K_sims),np.sqrt(np.pi))
L_avg = np.mean(L, axis=0)
L_cat = np.sqrt(np.array(K_cat) / np.pi)
L_up = np.quantile(L, 0.99, axis=0)
L_down = np.quantile(L, 0.01, axis=0)
plt.plot(r_cat, L_cat, color='r')
plt.plot(r_cat, L_avg)
plt.fill_between(r_cat, L_down, L_up,
color='gray', alpha=0.2)
plt.title('Model %s - L function' % model.name)
plt.grid()
plt.savefig(paths.get_ripleyk_figpath('L', 10, model.name))
plt.show()
K_total[model.name] = {'K_sims': K,
Results[model.name] = {'K_sims': K,
'Rk_sims': Rk_sims,
'K_cat': K_cat,
'K_avg': K_avg,
'K_down': K_down,
'rk_cat': rk_cat,
'L_sims': L,
'L_cat':L_cat,
'L_avg': L_avg,
'L_down': L_down,
'L_up': L_up}
with open(paths.get_kripley_result_path('Normpower2', 10), 'wb') as file_:
pickle.dump((model, catalog), file_)
with open(paths.get_kripley_result_path('Normpower2', 10), 'rb') as file_:
model, catalog = pickle.load(file_)
models = [model]
# if __name__ == '__main__':
#
# models = get_models.ten_years()
# catalog = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
# for model in models:
# model = models[10]
# with open('obj', 'wb') as file_:
# pickle.dump((model, catalog), file_)
# with open('obj', 'rb') as file_:
# model, catalog = pickle.load(file_)
# lons = model.get_longitudes()
# lats = model.get_latitudes()
# nsim = 100
# sim_catalogs = iter([get_simulations(model, catalog, lons, lats) for i in range(nsim)])
#
# K_sims = []
# R_disc = np.linspace(1, 1000, 50)
# for cat, inds in sim_catalogs:
# dist_sim = dists(cat)
# rates = model.spatial_counts()[inds]
# K_i = []
# for r in R_disc:
# b = K_r(dist_sim, rates, r)
# K_i.append(b)
# K_sims.append(K_i)
#
# K_avg = np.median(K_sims, axis=0)
# K_std = np.std(K_sims, axis=0)
# K_up = np.quantile(K_sims, 0.95, axis=0)
# K_down = np.quantile(K_sims, 0.15, axis=0)
#
# cat_obs = np.stack((catalog.get_longitudes(), catalog.get_latitudes())).T
# cat_grid_ind = model.region.get_index_of(catalog.get_longitudes(), catalog.get_latitudes())
# rates_cat = model.spatial_counts()[cat_grid_ind]
# dist_obs = dists(cat_obs)
# K_obs = []
# for r in R_disc:
# b = K_r(dist_obs, rates_cat, r)
# K_obs.append(b)
# print(cat_obs)
#
#
# L = np.divide(np.sqrt(K_sims),np.sqrt(np.pi))
# L_avg = np.mean(L, axis=0)
# L_obs = np.sqrt(np.array(K_obs)/np.pi)
# L_up = np.quantile(L, 0.85, axis=0)
# L_down = np.quantile(L, 0.15, axis=0)
# #
# #
# # plt.plot(R_disc, K_obs, color='r')
# # plt.plot(R_disc, K_avg)
# # plt.fill_between(R_disc, K_down, K_up,
# # color='gray', alpha=0.2)
# plt.plot(R_disc, L_obs, color='r')
# plt.plot(R_disc, L_avg)
# plt.fill_between(R_disc, L_down, L_up,
# color='gray', alpha=0.2)
# plt.show()
# your code here
# print(time.process_time() - start)
#
# def K_r(Events, rates, Dists, r):
# K_r = 0
# n = Events.shape[0]
#
# for i in range(n):
# for j in range(n):
# if j != i:
# mask = (Dists[i, :] < r) & (Dists[i, :] > 0)
# count = np.sum(mask)
# K_r += count / rates[i] / rates[j]
# return K_r / 899900
#################### CATALOG
# plt.close('all')
# cat = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
# region = cat.region
# counts= cat.spatial_counts()
# A = np.zeros(region.idx_map.shape)
# A[ A == 0] = np.nan
# for i in range(A.shape[0]):
# for j in range(A.shape[1]):
# if not np.isnan(region.idx_map[i,j]):
# A[i, j] = counts[int(region.idx_map[i,j])]
#
# cat_ind = cat.get_spatial_idx()
# Events = np.array([[i, j] for i,j in zip(cat.get_longitudes(), cat.get_latitudes())])
## Forecast
# models = get_models.ten_years()
# n_r = 25
# r_max = 1000
# range_models = range(19)
# savefig = True
# n_sim = 50
#
#
# R_s = (np.logspace(0, 3, num =n_r)-1).astype(int)
#
# T_x = []
# T = []
# for model_i in range_models:
# print('Analyzing model %s' % models[model_i].name)
# plt.figure(figsize=(6,4))
# plt.title('$K-$function\n %s' % models[model_i].name)
# forecast_data = models[model_i].spatial_counts()
# rates_eqk = forecast_data[cat_ind]
# Dists = dists(Events)
#
# K_cat = []
# for i in R_s:
# K_cat.append(K_r(Events, rates_eqk, Dists, i))
'PCF_sims': PCF_sims,
'Rpcf_sims':Rpcf_sims,
'pcf_cat': pcf_cat,
'rpcf_cat': rpcf_cat}
with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'wb') as file_:
pickle.dump(Results, file_)
#
# forecast_data = models[model_i].spatial_counts()
# observed_data = cat.spatial_counts()
# n_obs = numpy.sum(observed_data)
# n_fore = numpy.sum(forecast_data)
# scale = n_obs / n_fore
# expected_forecast_count = int(n_obs)
# num_events_to_simulate = expected_forecast_count
# sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
#
# K_sim = []
# for i in range(n_sim):
#
# sim_fore = numpy.zeros(sampling_weights.shape)
# sim = csep.core.poisson_evaluations._simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore)
# ind_sim = np.where(sim != 0)[0] #todo fix 2 events per cell
# events_sim = np.array([models[model_i].get_longitudes()[ind_sim], models[model_i].get_latitudes()[ind_sim]]).T
# dists_sim = dists(events_sim)
# rates_sim = forecast_data[ind_sim]
# K = []
# for i in R_s:
# K.append(K_r(events_sim, rates_sim, dists_sim, i))
# K_sim.append(K)
#
#
# K_sim = np.array(K_sim)
# K_avg = np.median(K_sim, axis=0)
# K_std = np.std(K_sim)
return Results
# K_avg = np.median(K_sim, axis=0)
# K_up = np.quantile(K_sim, 0.85, axis=0)
# K_down = np.quantile(K_sim, 0.15, axis=0)
if __name__ == "__main__":
# plt.plot(R_s, K_cat, color='red')
# plt.plot(R_s, K_avg)
# plt.fill_between(R_s, K_down, K_up,
# color='gray', alpha=0.2)
Results = run()
with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'rb') as file_:
Results = pickle.load(file_)
# L = np.divide(np.sqrt(K_sim), K_std)
# L_avg = np.mean(L, axis=0)
# L_std = np.std(L, axis=0)
# L_up = np.quantile(L, 0.95, axis=0)
# # L_up = L_avg + L_std
#
# L_down = np.quantile(L, 0.05, axis=0)
# # L_down = L_avg - L_std
#
# L_cat = np.divide(np.sqrt(K_cat), K_std)
#
# plt.plot(R_s, L_cat, color='red')
# plt.plot(R_s, L_avg, '--', color='blue')
#
# plt.fill_between(R_s, L_down, L_up,
# color='gray', alpha=0.3)
# plt.xlabel('$r$ [km]')
# plt.ylabel(r'$L(r)= \frac{K(r)}{\pi}$')
# plt.legend([r'Observed $L(r)$', r'Simulated average $\overline{L(r)}$', '$0.95$ confidence intervals'], loc=4, fontsize=8)
# plt.tight_layout()
# plt.grid()
# if savefig:
# plt.savefig(paths.get_ripleyk_figpath('K', 10, models[model_i].name), dpi=300)
# plt.show()
plot_results(Results)
from rpy2 import robjects
import rpy2.robjects.packages as rpackages
import numpy as np
from codes import get_models, paths
import matplotlib.pyplot as plt
from itertools import chain
from sklearn.preprocessing import KBinsDiscretizer
from matplotlib.lines import Line2D
import csep
import cartopy
import os
import statsmodels
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import fiona
from codes import get_catalogs
from rpy2.robjects import globalenv
def read_log(method, years):
filename_ = paths.get_spatent_result_path(method, years)
with open(filename_, 'r') as file_:
Results = {}
header = [m.strip() for m in file_.readline().split(',')]
for i in file_.readlines():
line = i.split(',')
Results[line[0]] = {}
for j,k in zip(line[1:], header[1:]):
Results[line[0]][k] = float(j)
return Results
def norm(array):
return (array - array.min())/(array.max() -array.min())
def lonlat2mercator(points):
"""
Defined to system RDN2008 - Italy (epsg:6875)
"""
src_crs = cartopy.crs.Geodetic()
dest_crs = cartopy.crs.epsg(6875)
Points = dest_crs.transform_points(src_crs, points[:,0], points[:,1])
return np.floor(Points[:,:2])
def classify_forecast(model, option=None, nbins=7, log=False, plot=False):
array = model.spatial_counts()
if log:
array = np.log10(array)
if isinstance(option, (np.ndarray, list)):
bins = option
if log==True:
option = "Logspace (%i, %i)" % (min(option), max(option))
else:
option = "Linspace (%.1e, %.1e)" % (min(option), max(option))
elif option == 'eq_quantile':
bins = []
for i in np.linspace(0, 1, nbins + 1)[1:-1]:
bins.append(np.nanquantile(array, i))
elif option == 'kmeans':
est = KBinsDiscretizer(n_bins=nbins, encode='ordinal', strategy='kmeans')
est.fit(array[np.isfinite(array)].reshape((-1, 1)))
bins = est.bin_edges_[0][1:-1]
elif option == 'total_eq_quantile':
forecasts = get_models.ten_years() #todo: just valid for tenyearsforecast
total_array = forecasts[0].spatial_counts()
for i in forecasts[1:]:
total_array = np.hstack((total_array, i.spatial_counts()))
total_array[total_array >= np.quantile(total_array, 0.99)] = np.quantile(total_array, 0.99)
total_array[total_array <= np.quantile(total_array, 0.01)] = np.quantile(total_array, 0.01)
if log:
total_array = np.log10(total_array)
total_array[~np.isfinite(total_array)] = np.nanmin(total_array[np.isfinite(total_array)])
bins = []
for i in np.linspace(0, 1, nbins + 1)[1:-1]:
bins.append(np.nanquantile(total_array, i))
elif option == 'total_kmeans':
forecasts = get_models.ten_years()
total_array = forecasts[0].spatial_counts()
for i in forecasts[1:]:
total_array = np.hstack((total_array, i.spatial_counts()))
total_array[total_array >= np.quantile(total_array, 0.99)] = np.quantile(total_array, 0.99)
total_array[total_array <= np.quantile(total_array, 0.01)] = np.quantile(total_array, 0.01)
if log:
total_array = np.log10(total_array)
total_array[~np.isfinite(total_array)] = np.nanmin(total_array[np.isfinite(total_array)])
est = KBinsDiscretizer(n_bins=7, encode='ordinal', strategy='kmeans')
est.fit(total_array[np.isfinite(total_array)].reshape((-1, 1)))
bins = est.bin_edges_[0][1:-1]
array[~np.isfinite(array)] = np.nanmin(array[np.isfinite(array)])
print('\trange', np.min(array), np.max(array))
print('\tquantile', np.quantile(array, 0.05),np.quantile(array, 0.95))
print('\tbins', ",".join(["%.2e" %i for i in bins]))
discretized_array = np.digitize(array, np.unique(bins))
# print('\tn_classes', len(np.unique(discretized_array)))
if plot:
print(option)
title = '%s Binning\n Model %s - %s years' % (str(option).capitalize(), model.name, model.years)
savepath = paths.get_sentropy_figpath("hist_" + str(option) +'_' + str(log), model.years, model.name)
fig = plt.figure(figsize=(8, 6))
plt.hist(array, bins=100, color='turquoise', alpha=0.8)
plt.title(title)
plt.axvline(np.min(array), color='red', linewidth=1, linestyle='--')
plt.axvline(np.max(array), color='red', linewidth=1, linestyle='--')
legend_elements = [Line2D([0], [0], color='blue', lw=5, label=r'Rates Distribution'),
Line2D([0], [0], color='red', lw=1, linestyle='--', label=r'99% mass intervals'),
Line2D([0], [0], color='green', lw=1, linestyle=':', label='Binning')]
for i in bins:
plt.axvline(i, linewidth=2, color='green', linestyle='--')
fig.get_axes()[0].legend(handles=legend_elements, loc='upper right')
plt.savefig(savepath, dpi=200, transparent=True)
plt.show()