Commit dc1c2454 authored by Maximilian Schanner's avatar Maximilian Schanner
Browse files

Reworked documentation, added pole bugfix, reworked sampling, minors.

parent a5abbbfb
......@@ -64,7 +64,7 @@ def xyz2rpt(x, y, z):
def intensity(posterior, mu_dips, cov_dips, r_ref, r_at=REARTH,
n_par_samps=1000, n_g_samps=100, n_points=1001,
n_samps=10000, n_points=1001,
ret_samps=False):
""" Calculate the pdf of the dipole intensity, by sampling and
kde-smoothing.
......@@ -79,15 +79,13 @@ def intensity(posterior, mu_dips, cov_dips, r_ref, r_at=REARTH,
The corresponding covariance matrices
r_ref : float
The reference radius for the input dipole coefficients
n_par_samps: int, default 100
The number of parameter sets to be sampled
n_g_samps : int, default 100
The number of samples per set of parameters
ret_samps : bool, default False
n_samps: int (optional, default is 10000)
The number of samples
ret_samps : bool (optional, default is False)
Wether to also return the raw samples
n_points : int, default 1001
n_points : int (optional, default is default 1001)
Number of evaluation points for the smoothed pdf
r_at : float, default pyfield.REARTH
r_at : float (optional, default is REARTH)
The reference radius for the output intensity
Returns
......@@ -103,7 +101,7 @@ def intensity(posterior, mu_dips, cov_dips, r_ref, r_at=REARTH,
gm_weights = posterior / posterior.sum()
ens_dip = sample_GM(gm_weights.flatten(), mu_dips, cov_dips,
n_par_samps=n_par_samps, n_g_samps=n_g_samps)
n_samps=n_samps)
# transform to spherical coordinates, keep only intensity
# the correspondence is g_1_0=z, g_1_1=x, g_1_-1=y
ens_inte, _, _ = xyz2rpt(ens_dip[1], ens_dip[2], ens_dip[0])
......@@ -117,7 +115,7 @@ def intensity(posterior, mu_dips, cov_dips, r_ref, r_at=REARTH,
return points, pdf
def location(posterior, mu_dips, cov_dips, n_points=10000,
def location(posterior, mu_dips, cov_dips, n_points=1000,
bounds=[[0., np.deg2rad(30)], [0, 2*np.pi]]):
""" From the posterior and corresponding dipole coefficients, calculate
the pdf of the dipole location
......@@ -130,10 +128,11 @@ def location(posterior, mu_dips, cov_dips, n_points=10000,
The dipole coefficients from the integration
cov_coeffs : array-like of shape (N, 3, 3)
The corresponding covariance matrices
n_points : int, default 1000
n_points : int (optional, default is 1000)
The (approximate) number of gridpoints to evaluate the location on
bounds : 2x2 array-like, default [[0., np.deg2rad(30)],
[np.deg2rad(0), np.deg2rad(350)]]
bounds : 2x2 array-like (optional,
default is [[0., np.deg2rad(30)],
[0., 2*np.pi]])
The interval over which the density shall be plotted, in rad. This is
useful, since in most cases the dipole will be close to the geographic
north pole, thus the default intervall covers only latitudes down to
......@@ -183,8 +182,7 @@ def location(posterior, mu_dips, cov_dips, n_points=10000,
return lat, lon, dens
def sample_GM(weights, mus, covs,
n_par_samps=1000, n_g_samps=100):
def sample_GM(weights, mus, covs, n_samps=10000):
""" Sample from a Gaussian mixture distribution
Parameters
......@@ -195,43 +193,28 @@ def sample_GM(weights, mus, covs,
The means of the mixture components
covs : array-like
The covariance-matrices of the mixture components
n_par_samps: int, default 1000
The number of mixture-component samples
n_g_samps : int, default 100
The number of samples per mixture component
n_samps: int (optional, default is 10000)
The number of samples
Returns
-------
ens : numpy.ndarray
n_par_samps*n_g_samps samples from the Gaussian mixture
n_samps samples from the Gaussian mixture
"""
if not np.isclose(sum(weights), 1.):
raise ValueError("Sum of the weights has to be one!")
if len(np.asarray(mus).shape) == 1:
dim = len(mus)
else:
dim = len(mus[0])
n = len(weights)
# sample indices, corresponding to parameter pairs
# weights are given by the posterior
par_samps = np.random.choice(n, size=n_par_samps, replace=True,
par_samps = np.random.choice(n, size=n_samps, replace=True,
p=weights)
# allocate ensemble array
ens = np.empty((dim, n_g_samps*n_par_samps))
for it in range(n_par_samps):
# get the index
ind_it = par_samps[it]
# sample from the corresponding Gaussian
samp = np.random.multivariate_normal(mean=mus[ind_it],
cov=covs[ind_it],
size=n_g_samps)
# and write the samples to the ensemble
ens[:, it*n_g_samps:(it+1)*n_g_samps] = samp.T
# TODO: Is it possible to boost this?
ens = [np.random.multivariate_normal(mean=mus[it], cov=covs[it])
for it in par_samps]
return ens
return np.asarray(ens).T
def rescale_coeff_results(r_from, r_to, mu_coeffs, cov_coeffs):
......@@ -278,7 +261,7 @@ def coeffs(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH):
The corresponding covariance matrices
r_ref : float
The reference radius for the input coefficients
r_at : float, default pyfield.REARTH
r_at : float (optional, default is REARTH)
The reference radius for the output coefficients
Returns
......@@ -322,7 +305,7 @@ def coeffs(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH):
def spectrum(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH,
n_par_samps=1000, n_g_samps=1000):
n_samps=100000):
""" Calculate the power spectrum resulting from the mixture and find the
68% confidence-interval by sampling
......@@ -336,12 +319,10 @@ def spectrum(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH,
The corresponding covariance matrices
r_ref : float
The reference radius for the input coefficients
r_at : float, default pyfield.REARTH
r_at : float (optional, default is REARTH)
The reference radius for the output coefficients
n_par_samps: int, default 1000
The number of parameter sets to be sampled
n_g_samps : int, default 100
The number of samples per set of parameters
n_samps: int (optional, default is 100000)
The number of samples for the percentiles
Returns
-------
......@@ -361,7 +342,7 @@ def spectrum(posterior, mu_coeffs, cov_coeffs, r_ref, r_at=REARTH,
gm_weights = posterior / posterior.sum()
ens = sample_GM(gm_weights.flatten(), mu_coeffs, cov_coeffs,
n_par_samps=n_par_samps, n_g_samps=n_g_samps)
n_samps=n_samps)
# 1st two moments of mixture
mean = np.sum(mu_coeffs * gm_weights.flatten()[:, None], axis=0)
......
......@@ -2,7 +2,7 @@
This module is part of the CORBASS algorithm. It provides a routine for the
first step, the exploration of the parameter space.
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Stefan Mauerberger, University of Potsdam
Cite as:
......@@ -83,14 +83,14 @@ class ExplorationResult:
print('No results available, nothing to write!')
def explore(name, data, N, bounds, r_ref=3200., prefix=''):
def explore(name, data, N, bounds, r_ref=3200.):
""" Explore the parameterspace given by bounds on an NxNxN grid, for the
given data and save the output
Parameters
----------
fname : str
A name for the output
name : str
A name for the bin
data : Pandas.DataFrame
The data to be explored
N : int
......@@ -98,7 +98,7 @@ def explore(name, data, N, bounds, r_ref=3200., prefix=''):
bounds : 3x2 array-like
The boundaries for exploration, as min-max pairs for the non-dipole
variance, the error scaling and the residual
r_ref : float, default 3200.0
r_ref : float (optional, default is 3200.0)
The reference radius
Returns
......
......@@ -5,7 +5,7 @@
assisting routine for finding a region of high probability mass, given
output from the previous exploration step.
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Stefan Mauerberger, University of Potsdam
Cite as:
......@@ -184,7 +184,7 @@ def get_integ_bounds(expl_bounds, posterior, moments_out=False):
posterior : NxNxN array-like
The posterior, resulting from exploration of some data over the space
spanned by expl_bounds
moments_out : bool, default False
moments_out : bool (optional, default is False)
Wether to output the empirical means and variances
Returns
......@@ -244,22 +244,22 @@ def integrate(name, data, N, bounds, r_ref=3200.,
Parameters
----------
data_grouped : Pandas.DataFrameGroupBy
The binned data
name : str
A name for the bin
data : Pandas.DataFrame
The data for the bin
N : int
The number of points per dimension
bounds : 3x2 array-like
The boundaries for integration, as min-max pairs for the non-dipole
variance, the error scaling and the residual
r_ref : float, default 3200.0
r_ref : float (optional, default is 3200.0)
The reference radius
name : str, default 'default'
A name for the output
N_desi : int, default 100
N_desi : int (optional, default is 100)
The approximate number of design points for the field
l_max : int, default 15
l_max : int (optional, default is 15)
The maximum Gauss coefficient degree
scale : float, default 0
scale : float (optional, default is 0)
A scaling for the posterior, which may be necessary if the values are
small, so that np.exp(posterior) evaluates to 0. A good choice is the
maximum of the posterior resulting from the grid exploration.
......@@ -394,7 +394,7 @@ def integrate(name, data, N, bounds, r_ref=3200.,
var_z_cmb -= mu_z_cmb**2
var_f_cmb -= mu_f_cmb**2
# again scale the log-posterior...
# scale the log-posterior...
posterior -= posterior.max()
# ...take the exponential...
posterior = np.exp(posterior)
......@@ -433,16 +433,9 @@ def integrate(name, data, N, bounds, r_ref=3200.,
if __name__ == '__main__':
from utils import load
from corbass.utils import load
from corbass.exploration import ExplorationResult
try:
par_file = sys.argv[1]
except IndexError:
print(f'Run "{sys.argv[0]} <path/to/par_file.py>", specifying the path'
f' to a parameter file!')
sys.exit()
pars = load(sys.argv)
for bin_name, data in pars.data:
......
This diff is collapsed.
......@@ -42,7 +42,8 @@ path = Path(__file__).parent
def read_data(fname):
""" Read file from the GEOMAGIA database and return a pandas.Dataframe """
""" Read a file from the GEOMAGIA database and return a pandas.Dataframe
"""
# Missing values are indicated by either one of
na = ('9999', '999', '999.9', 'nan', '-999')
# Read data as DataFrame
......@@ -151,8 +152,8 @@ def newer(file_a, file_b):
def load(argv):
""" Given the sys.argv of a program, this helper function tries to load a
parameter file, given as argv[1] and checks of a second argument (argv[2]),
specifying an epoch. """
parameter file, given as argv[1] and checks for a second argument
(argv[2]), specifying an epoch. """
# handle the case that load is used from a notebook
if isinstance(argv, str):
argv = [None, argv]
......@@ -262,4 +263,3 @@ def dif2nez(d, i, f):
return f*np.cos(np.deg2rad(i))*np.cos(np.deg2rad(d)), \
f*np.cos(np.deg2rad(i))*np.sin(np.deg2rad(d)), \
f*np.sin(np.deg2rad(i))
......@@ -2,7 +2,7 @@
This script runs the CORBASS algorithm for a parameter file that is
passed via the command line.
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Maximilian Schanner, GFZ Potsdam
Copyright (C) 2019 Stefan Mauerberger, University of Potsdam
Cite as:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment