Commit c0f5497f authored by Pablo Iturrieta Rebolledo's avatar Pablo Iturrieta Rebolledo
Browse files

Parallelized K func

parent f774ba3a
......@@ -22,4 +22,6 @@ Currently, this project is based on `PyCSEP` and the requirements found there in
pip3 install seaborn
```
At the time (5-11-20), the `PyCSEP` fork/branch used is based on https://github.com/pabloitu/csep2.git, branch `plots`. This repository should be cloned from there. Plot functions are pending within a pull request of the upstream PyCSEP repository.
\ No newline at end of file
At the time (5-11-20), the `PyCSEP` fork/branch used is based on https://github.com/pabloitu/csep2.git, branch `plots`. This repository should be cloned from there. Plot functions are pending within a pull request of the upstream PyCSEP repository.
Install R from source (4.1.0) and addd packages spatstats and spatentropy
\ No newline at end of file
......@@ -8,13 +8,13 @@ import numpy as np
from codes import get_catalogs
import csep
import itertools
from multiprocessing import Pool
from rpy2 import robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects import globalenv
import cartopy
import seaborn as sns
import time
def get_simulations(model, catalog, lons, lats):
......@@ -76,7 +76,7 @@ def lonlat2mercator(points):
Defined to system RDN2008 - Italy (epsg:6875)
"""
src_crs = cartopy.crs.Geodetic()
dest_crs = cartopy.crs.epsg(6875)
dest_crs = cartopy.crs.Mercator()
Points = dest_crs.transform_points(src_crs, points[:,0], points[:,1])
return np.floor(Points[:,:2])
......@@ -92,9 +92,72 @@ def pcf_spatstat(fv, method='c'):
pcf_results = pcf_func.rcall(pcf_args, globalenv)
return pcf_results
def K_r_parallel(data):
points = data[0]
model = data[1]
normpower = 2
plot = False
r = np.linspace(0,800,100)
## Import R spatstat modules
spat_geom = rpackages.importr("spatstat.geom")
spat_core = rpackages.importr("spatstat.core")
# Import model to R
region = model.region
# Get the region properties
midpoints = region.midpoints()
lon_r = np.sort(np.unique(midpoints[:, 0]))
lat_r = np.sort(np.unique(midpoints[:, 1]))
X_r = lonlat2mercator(np.stack([lon_r, np.repeat(min(lat_r), lon_r.shape[0])]).T)[:,0]/1000
Y_r = lonlat2mercator(np.stack([np.repeat(min(lon_r), lat_r.shape[0]), lat_r]).T)[:,1]/1000
Xregion_array = robjects.FloatVector(np.sort(np.unique(X_r)))
Yregion_array = robjects.FloatVector(np.sort(np.unique(Y_r)))
# Get the model rates
rates = model.spatial_counts(cartesian=True)
rates_array = robjects.FloatVector(rates.T.ravel())
rates_mat = robjects.r['matrix'](rates_array, nrow=rates.shape[0])
image = spat_geom.im(rates_mat, Xregion_array, Yregion_array)
# Get the polygon window of the forecast
italy_region = fiona.open(paths.italy_region)
polygon = np.array(italy_region[0]['geometry']['coordinates'][0])
polygon = lonlat2mercator(polygon)/1000.
poly_array = robjects.FloatVector(np.flipud(polygon[:-1, :]).T.ravel())
poly_mat = robjects.r['matrix'](poly_array, nrow=polygon.shape[0] - 1)
poly_array_xrange = robjects.FloatVector([np.min(polygon[:, 0]), np.max(polygon[:, 0])])
poly_array_yrange = robjects.FloatVector([np.min(polygon[:, 1]), np.max(polygon[:, 1])])
window = spat_geom.owin(xrange=poly_array_xrange, yrange=poly_array_yrange, poly=poly_mat)
# Convert the catalog into R point process model
Points = lonlat2mercator(points)/1000
point_array_x = robjects.FloatVector(Points[:, 0])
point_array_y = robjects.FloatVector(Points[:, 1])
PPP_R = spat_geom.ppp(point_array_x, point_array_y, window)
# print(min(Points[:, 1]), max(Points[:, 1]))
# Get arguments: r-vector user-defined or automatic
if r is None:
args = (('X', PPP_R), ('lambda', image), ('correction', 'best'),
('normpower', normpower))
else:
args = (('X', PPP_R), ('lambda', image), ('correction', 'best'),
('normpower', normpower), ('r', robjects.FloatVector(r)))
# Get results
kinhom = spat_core.Kinhom
K = kinhom.rcall(args, globalenv)
if plot:
spat_geom.plot_im(image, 'asd', multiplot=True)
spat_geom.plot_ppp(PPP_R, 'asd', add=True)
return K
def K_r_spatstat(points, model, normpower=2,plot=False, r=None):
#todo get cartesian coordinates
## Import R spatstat modules
spat_geom = rpackages.importr("spatstat.geom")
......@@ -129,6 +192,7 @@ def K_r_spatstat(points, model, normpower=2,plot=False, r=None):
# Convert the catalog into R point process model
Points = lonlat2mercator(points)/1000
print(Points.shape)
point_array_x = robjects.FloatVector(Points[:, 0])
point_array_y = robjects.FloatVector(Points[:, 1])
......@@ -151,52 +215,63 @@ def K_r_spatstat(points, model, normpower=2,plot=False, r=None):
spat_geom.plot_im(image, 'asd', multiplot=True)
spat_geom.plot_ppp(PPP_R, 'asd', add=True)
# plt.plot(r_out, K, '--')
# plt.show()
return K
def plot_results(Results, alpha=0.01):
def plot_results(Results, alpha=0.05):
n = 0
for key, value in Results.items():
K_sims = value['K_sims']
# n+=1
# if n == 2:
# break
K_sims = value['K_sims'][2]
K_cat = value['K_cat']
r_cat = value['rk_cat']
r_sims = value['Rk_sims'][0]
K_avg = np.mean(K_sims, axis=0)
K_up = np.quantile(K_sims, 1-alpha/2, axis=0)
K_down = np.quantile(K_sims, alpha/2, axis=0)
# sns.set_theme(font='sans-serif')
sns.set_style("darkgrid", {"axes.facecolor": ".9", 'font.family':'Ubuntu'})
sns.lineplot(r_cat, K_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, K_avg, label='Sim. average')
# sns.lin
plt.fill_between(r_sims, K_down, K_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-2*alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$\hat{K}(r)$")
plt.legend(loc=2)
plt.savefig(paths.get_ripleyk_figpath('K', 10, key))
plt.show()
L_sims = value['L_sims']
L_cat = value['L_cat']
L_avg = np.mean(L_sims, axis=0)
L_up = np.quantile(L_sims, 1-alpha/2, axis=0)
L_down = np.quantile(L_sims, alpha/2, axis=0)
# K_avg = np.nanmean(K_sims, axis=0)
# K_up = np.nanquantile(K_sims, 1-alpha/2, axis=0)
# K_down = np.nanquantile(K_sims, alpha/2, axis=0)
# print(K_avg)
# # sns.set_theme(font='sans-serif')
sns.set_style("darkgrid", {"axes.facecolor": ".9", 'font.family':'Ubuntu'})
# for i in K_sims:
# sns.lineplot(r_sims, i, lw=0.1, color='black')
# sns.lineplot(r_cat, K_cat, color='r', label='Observed catalog')
# sns.lineplot(r_sims, K_avg, label='Sim. average')
#
# # sns.lin
# plt.fill_between(r_sims, K_down, K_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-2*alpha))
# plt.title("Model - %s" % key, fontsize=16)
# plt.xlabel(r"$r~~\mathrm{[km]}$")
# plt.ylabel(r"$\hat{K}(r)$")
# plt.legend(loc=2)
#
# plt.savefig(paths.get_ripleyk_figpath('K', 10, key))
# plt.show()
sns.lineplot(r_cat, L_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, L_avg, label='Sim. average')
plt.fill_between(r_sims, L_down, L_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$L(r) = \sqrt{\frac{\hat{K}(r)}{\pi}}$")
plt.legend(loc=4)
plt.savefig(paths.get_ripleyk_figpath('L', 10, key))
plt.show()
# L_sims = value['L_sims']
# L_cat = value['L_cat']
# L_avg = np.mean(L_sims, axis=0)
# L_up = np.quantile(L_sims, 1-alpha/2, axis=0)
# L_down = np.quantile(L_sims, alpha/2, axis=0)
#
#
# for i in L_sims:
# sns.lineplot(x=r_sims, y=i, lw=0.05, color='blue', ls='--')
# sns.lineplot(x=r_cat, y=L_cat, color='r', label='Observed catalog')
# sns.lineplot(x=r_sims, y=L_avg, label='Sim. average')
# plt.fill_between(r_sims, L_down, L_up, color='gray', alpha=0.4, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-alpha))
# plt.title("Model - %s" % key, fontsize=16)
# plt.xlabel(r"$r~~\mathrm{[km]}$")
# plt.ylabel(r"$L(r) = \sqrt{\frac{\hat{K}(r)}{\pi}}$")
# plt.legend(loc=4)
# plt.savefig(paths.get_ripleyk_figpath('L', 10, key))
# plt.show()
pcf_sims = value['PCF_sims']
pcf_cat = value['pcf_cat']
......@@ -206,16 +281,84 @@ def plot_results(Results, alpha=0.01):
pcf_up = np.quantile(pcf_sims, 1-alpha/2, axis=0)
pcf_down = np.quantile(pcf_sims, alpha/2, axis=0)
sns.lineplot(r_cat, pcf_cat, color='r', label='Observed catalog')
sns.lineplot(r_sims, pcf_avg, label='Sim. average')
sns.lineplot(x=r_cat, y=pcf_cat, color='r', label='Observed catalog')
sns.lineplot(x=r_sims, y=pcf_avg, label='Sim. average')
plt.fill_between(r_sims, pcf_down, pcf_up, color='gray', alpha=0.2, label=r'Sim. envelope ($\alpha=%.2f$)' % (1-alpha))
plt.title("Model - %s" % key, fontsize=16)
plt.ylim([-20, 20])
plt.xlabel(r"$r~~\mathrm{[km]}$")
plt.ylabel(r"$g(r) = \frac{1}{2\pi r}\,\frac{dK(r)}{dr}$")
plt.legend(loc=1)
plt.legend(loc=4)
plt.savefig(paths.get_ripleyk_figpath('pcf', 10, key))
plt.show()
def run_parallel():
models = get_models.ten_years()
catalog = get_catalogs.filter_10yr_01_2010(get_catalogs.bollettino())
Results = dict.fromkeys([i.name for i in models])
for model in models:
print(model.name)
points = np.stack((catalog.get_longitudes(), catalog.get_latitudes())).T
r = np.linspace(0,800,100)
K = K_r_spatstat(points, model, r=r)
rk_cat = K[0]
K_cat = K[2]
PCF = pcf_spatstat(K)
rpcf_cat = PCF[0]
pcf_cat = PCF[2]
lons = model.get_longitudes()
lats = model.get_latitudes()
nsim = 16
sim_catalogs = iter([(get_simulations(model, catalog, lons, lats)[0], model) for i in range(nsim)])
Rk_sims = []
K_sims = []
Rpcf_sims = []
PCF_sims = []
start = time.process_time()
p = Pool(8)
A = p.map(K_r_parallel, sim_catalogs)
p.close()
for K in A:
rk_i = list(K[0])
K_i = list(K[2])
PCF = pcf_spatstat(K)
rpcf_i = list(PCF[0])
pcf_i = list(PCF[2])
Rk_sims.append(rk_i)
K_sims.append(K_i)
Rpcf_sims.append(rpcf_i)
PCF_sims.append(pcf_i)
print(time.process_time() - start)
assert numpy.allclose(np.mean(Rk_sims, axis=0), np.array(Rk_sims[0]))
assert numpy.allclose(np.mean(Rpcf_sims, axis=0), np.array(Rpcf_sims[0]))
L = np.divide(np.sqrt(K_sims),np.sqrt(np.pi))
L_cat = np.sqrt(np.array(K_cat) / np.pi)
Results[model.name] = {'K_sims': K,
'Rk_sims': Rk_sims,
'K_cat': K_cat,
'rk_cat': rk_cat,
'L_sims': L,
'L_cat':L_cat,
'PCF_sims': PCF_sims,
'Rpcf_sims':Rpcf_sims,
'pcf_cat': pcf_cat,
'rpcf_cat': rpcf_cat}
with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'wb') as file_:
pickle.dump(Results, file_)
return Results
def run():
......@@ -238,14 +381,14 @@ def run():
lons = model.get_longitudes()
lats = model.get_latitudes()
nsim = 200
nsim = 16
sim_catalogs = iter([get_simulations(model, catalog, lons, lats) for i in range(nsim)])
Rk_sims = []
K_sims = []
Rpcf_sims = []
PCF_sims = []
start = time.process_time()
for cat, inds in sim_catalogs:
K = K_r_spatstat(cat, model, r=r)
......@@ -260,7 +403,7 @@ def run():
K_sims.append(K_i)
Rpcf_sims.append(rpcf_i)
PCF_sims.append(pcf_i)
print(time.process_time() - start)
assert numpy.allclose(np.mean(Rk_sims, axis=0), np.array(Rk_sims[0]))
assert numpy.allclose(np.mean(Rpcf_sims, axis=0), np.array(Rpcf_sims[0]))
......@@ -278,18 +421,17 @@ def run():
'pcf_cat': pcf_cat,
'rpcf_cat': rpcf_cat}
with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'wb') as file_:
pickle.dump(Results, file_)
# with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'wb') as file_:
# pickle.dump(Results, file_)
return Results
if __name__ == "__main__":
Results = run()
with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'rb') as file_:
Results = pickle.load(file_)
Results = run_parallel()
#
# with open(paths.get_kripley_result_path('KRipley_bolletino', 10), 'rb') as file_:
# Results = pickle.load(file_)
plot_results(Results)
# plot_results(Results)
fiona
rpy2
seaborn
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment