Commit e352e9dc authored by Sebastian Heimann's avatar Sebastian Heimann

reorganized misfit combine functions

parent c28c015a
......@@ -11,9 +11,9 @@ inplace=1
[nosetests]
verbosity=2
detailed-errors=1
with-coverage=1
cover-erase=1
cover-package=kite
#with-coverage=0
#cover-erase=1
#cover-package=grond
[coverage:report]
exclude_lines =
......
......@@ -144,18 +144,18 @@ def get_mean_x(xs):
def get_mean_x_and_gm(problem, xs, misfits):
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
return num.mean(xs, axis=0), num.mean(gms)
def get_best_x(problem, xs, misfits):
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
ibest = num.argmin(gms)
return xs[ibest, :]
def get_best_x_and_gm(problem, xs, misfits):
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
ibest = num.argmin(gms)
return xs[ibest, :], gms[ibest]
......@@ -220,7 +220,7 @@ def forward(rundir_or_config_path, event_names):
problem, xs, misfits = load_problem_info_and_data(
rundir, subset='harvest')
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
ibest = num.argmin(gms)
xbest = xs[ibest, :]
......@@ -276,7 +276,6 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
optimizer_fn = op.join(rundir, 'optimizer.yaml')
optimizer = guts.load(filename=optimizer_fn)
nbootstrap = optimizer.nbootstrap
dumpdir = op.join(rundir, 'harvest')
if op.exists(dumpdir):
......@@ -289,14 +288,14 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
ibests_list = []
ibests = []
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
isort = num.argsort(gms)
ibests_list.append(isort[:nbest])
if weed != 3:
for ibootstrap in range(nbootstrap):
bms = problem.bootstrap_misfits(misfits, nbootstrap, ibootstrap)
for ibootstrap in range(optimizer.nbootstrap):
bms = optimizer.bootstrap_misfits(problem, misfits, ibootstrap)
isort = num.argsort(bms)
ibests_list.append(isort[:nbest])
ibests.append(isort[0])
......@@ -731,7 +730,7 @@ class ResultStats(Object):
def make_stats(problem, xs, misfits, pnames=None):
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
ibest = num.argmin(gms)
rs = ResultStats(problem=problem)
if pnames is None:
......@@ -854,7 +853,7 @@ def export(what, rundirs, type=None, pnames=None, filename=None):
dump(x_mean, gm_mean, indices)
elif what == 'ensemble':
gms = problem.global_misfits(misfits)
gms = problem.combine_misfits(misfits)
isort = num.argsort(gms)
for i in isort:
dump(xs[i], gms[i], indices)
......
......@@ -64,6 +64,10 @@ class SamplerStartingPointChoice(StringChoice):
choices = ['excentricity_compensated', 'random', 'mean']
class BootstrapTypeChoice(StringChoice):
choices = ['bayesian', 'classic']
class SamplerPhase(Object):
niterations = Int.T()
ntries_preconstrain_limit = Int.T(default=1000)
......@@ -221,8 +225,31 @@ class DirectedSamplerPhase(SamplerPhase):
return x
def make_bootstrap_weights(nbootstrap, ntargets, type='bayesian', seed=None):
ws = num.zeros((nbootstrap, ntargets))
rstate = num.random.RandomState(seed)
for ibootstrap in range(nbootstrap):
if type == 'classic':
ii = rstate.randint(0, ntargets, size=ntargets)
ws[ibootstrap, :] = num.histogram(
ii, ntargets, (-0.5, ntargets - 0.5))[0]
elif type == 'bayesian':
f = rstate.uniform(0., 1., size=ntargets+1)
f[0] = 0.
f[-1] = 1.
f = num.sort(f)
g = f[1:] - f[:-1]
ws[ibootstrap, :] = g * ntargets
else:
assert False
return ws
class Chains(object):
def __init__(self, problem, history, nchains, nlinks_cap):
def __init__(
self, problem, history, nchains, nlinks_cap,
bootstrap_weights):
self.problem = problem
self.history = history
......@@ -236,11 +263,12 @@ class Chains(object):
self.accept_sum = num.zeros(nchains, dtype=num.int)
history.add_listener(self)
def append(self, iiter, model, misfits):
self.bootstrap_weights = num.vstack((
num.ones((1, self.problem.ntargets)),
bootstrap_weights))
gm = self.problem.global_misfit(misfits)
bms = self.problem.bootstrap_misfit(misfits, self.nchains - 1)
gbms = num.concatenate(([gm], bms))
def append(self, iiter, model, misfits):
gbms = self.problem.combine_misfits(misfits, self.bootstrap_weights)
self.chains_m[:, self.nlinks] = gbms
self.chains_i[:, self.nlinks] = iiter
......@@ -307,27 +335,61 @@ class HighScoreOptimizer(Optimizer):
sampler_phases = List.T(SamplerPhase.T())
chain_length_factor = Float.T(default=8.)
nbootstrap = Int.T(default=10)
bootstrap_type = BootstrapTypeChoice.T(default='bayesian')
bootstrap_seed = Int.T(default=23)
def optimize(self, problem, rundir=None):
def __init__(self, **kwargs):
Optimizer.__init__(self, **kwargs)
self._bootstrap_weights = {}
if rundir is not None:
self.dump(filename=op.join(rundir, 'optimizer.yaml'))
def get_bootstrap_weights(self, problem):
k = (problem,
self.nbootstrap,
self.bootstrap_type,
self.bootstrap_seed)
niter = sum(phase.niterations for phase in self.sampler_phases)
if k not in self._bootstrap_weights:
self._bootstrap_weights[k] = make_bootstrap_weights(
self.nbootstrap, problem.ntargets,
type=self.bootstrap_type,
seed=self.bootstrap_seed)
iiter = 0
return self._bootstrap_weights[k]
isbad_mask = None
def bootstrap_misfits(self, problem, misfits, ibootstrap):
bweights = self.get_bootstrap_weights(problem)
bms = problem.combine_misfits(
misfits,
extra_weights=bweights[ibootstrap:ibootstrap+1, :])[:, 0]
return bms
def setup_chains(self, problem, history):
bootstrap_weights = self.get_bootstrap_weights(problem)
nlinks_cap = int(round(
self.chain_length_factor * problem.nparameters + 1))
return Chains(
problem, history, self.nbootstrap+1, nlinks_cap,
bootstrap_weights=bootstrap_weights)
def optimize(self, problem, rundir=None):
if rundir is not None:
self.dump(filename=op.join(rundir, 'optimizer.yaml'))
history = ModelHistory(problem, path=rundir, mode='w')
chains = Chains(problem, history, self.nbootstrap+1, nlinks_cap)
chains = self.setup_chains(problem, history)
phases = list(self.sampler_phases)
phase = phases.pop(0)
niter = sum(phase.niterations for phase in self.sampler_phases)
isbad_mask = None
iiter_phase_start = 0
iiter = 0
while iiter < niter:
if iiter - iiter_phase_start == phase.niterations:
......
......@@ -136,7 +136,7 @@ def draw_sequence_figures(
models = history.models
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
gms_softclip = num.where(gms > 1.0, 0.2 * num.log10(gms) + 1.0, gms)
isort = num.argsort(gms)[::-1]
......@@ -308,10 +308,10 @@ def draw_jointpar_figures(
xref = problem.get_xref()
if ibootstrap is not None:
gms = problem.bootstrap_misfits(
history.misfits, optimizer.nbootstrap, ibootstrap)
gms = optimizer.bootstrap_misfits(
problem, history.misfits, ibootstrap)
else:
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)[::-1]
......@@ -507,7 +507,7 @@ def draw_solution_figure(
logger.warn('empty models vector')
return []
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)
iorder = num.empty_like(isort)
iorder[isort] = num.arange(iorder.size)[::-1]
......@@ -645,7 +645,7 @@ def draw_contributions_figure(history, optimizer, plt):
imodels = num.arange(history.nmodels)
gms = problem.global_misfits(history.misfits)**problem.norm_exponent
gms = problem.combine_misfits(history.misfits)**problem.norm_exponent
isort = num.argsort(gms)[::-1]
......@@ -748,7 +748,7 @@ def draw_bootstrap_figure(history, optimizer, plt):
fig = plt.figure()
problem = history.problem
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
imodels = num.arange(history.nmodels)
......@@ -758,8 +758,9 @@ def draw_bootstrap_figure(history, optimizer, plt):
ibests = []
for ibootstrap in range(optimizer.nbootstrap):
bms = problem.bootstrap_misfits(
history.misfits, optimizer.nbootstrap, ibootstrap)
bms = optimizer.bootstrap_misfits(
problem, history.misfits, ibootstrap)
isort_bms = num.argsort(bms)[::-1]
ibests.append(isort_bms[-1])
......@@ -891,7 +892,7 @@ def draw_fits_figures_statics(ds, history, optimizer, plt):
for target in problem.targets:
target.set_dataset(ds)
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)
gms = gms[isort]
models = history.models[isort, :]
......@@ -1019,7 +1020,7 @@ def draw_fits_ensemble_figures(
target_index = dict(
(target, i) for (i, target) in enumerate(problem.targets))
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)[::-1]
gms = gms[isort]
models = history.models[isort, :]
......@@ -1395,7 +1396,7 @@ def draw_fits_figures(ds, history, optimizer, plt):
target_index = dict(
(target, i) for (i, target) in enumerate(problem.waveform_targets))
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)
gms = gms[isort]
models = history.models[isort, :]
......@@ -1920,7 +1921,7 @@ def draw_location_figure(history, optimizer, plt):
bounds = problem.get_combined_bounds()
gms = problem.global_misfits(history.misfits)
gms = problem.combine_misfits(history.misfits)
isort = num.argsort(gms)[::-1]
......
from __future__ import print_function
import numpy as num
import math
import copy
......@@ -45,7 +46,7 @@ class Problem(Object):
self._bootstrap_weights = None
self._target_weights = None
self._engine = None
self._group_mask = None
self._family_mask = None
if hasattr(self, 'problem_waveform_parameters') and self.has_waveforms:
self.problem_parameters =\
......@@ -188,7 +189,14 @@ class Problem(Object):
self._engine = engine
def random_uniform(self, xbounds):
raise NotImplementedError()
x = []
for i in xrange(self.nparameters):
x.append(num.random.uniform(xbounds[i, 0], xbounds[i, 1]))
return num.array(x, dtype=num.float)
def preconstrain(self, x):
return x
def extract(self, xs, i):
if xs.ndim == 1:
......@@ -200,37 +208,6 @@ class Problem(Object):
return self.make_dependant(
xs, self.dependants[i-self.nparameters].name)
def make_bootstrap_weights(self, nbootstrap, type='classic'):
ntargets = self.ntargets
ws = num.zeros((nbootstrap, ntargets))
rstate = num.random.RandomState(23)
for ibootstrap in range(nbootstrap):
if type == 'classic':
ii = rstate.randint(0, ntargets, size=self.ntargets)
ws[ibootstrap, :] = num.histogram(
ii, ntargets, (-0.5, ntargets - 0.5))[0]
elif type == 'bayesian':
f = rstate.uniform(0., 1., size=self.ntargets+1)
f[0] = 0.
f[-1] = 1.
f = num.sort(f)
g = f[1:] - f[:-1]
ws[ibootstrap, :] = g * ntargets
else:
assert False
return ws
def get_bootstrap_weights(self, nbootstrap, ibootstrap=None):
if self._bootstrap_weights is None:
self._bootstrap_weights = self.make_bootstrap_weights(
nbootstrap, type='bayesian')
if ibootstrap is None:
return self._bootstrap_weights
else:
return self._bootstrap_weights[ibootstrap, :]
def get_target_weights(self):
if self._target_weights is None:
self._target_weights = num.array(
......@@ -240,25 +217,30 @@ class Problem(Object):
return self._target_weights
def inter_group_weights(self, ns):
def inter_family_weights(self, ns):
exp, root = self.get_norm_functions()
group, ngroups = self.get_group_mask()
family, nfamilies = self.get_family_mask()
ws = num.zeros(self.ntargets)
for igroup in range(ngroups):
mask = group == igroup
for ifamily in range(nfamilies):
mask = family == ifamily
ws[mask] = 1.0 / root(num.nansum(exp(ns[mask])))
return ws
def inter_group_weights2(self, ns):
def inter_family_weights2(self, ns):
'''
:param ns: 2D array with normalization factors ``ns[imodel, itarget]``
:returns: 2D array ``weights[imodel, itarget]``
'''
exp, root = self.get_norm_functions()
group, ngroups = self.get_group_mask()
family, nfamilies = self.get_family_mask()
ws = num.zeros(ns.shape)
for igroup in range(ngroups):
mask = group == igroup
for ifamily in range(nfamilies):
mask = family == ifamily
ws[:, mask] = (1.0 / root(
num.nansum(exp(ns[:, mask]), axis=1)))[:, num.newaxis]
......@@ -307,87 +289,46 @@ class Problem(Object):
else:
self.raise_invalid_norm_exponent()
def bootstrap_misfit(self, misfits, nbootstrap, ibootstrap=None):
'''
:param misifts: 2D array with target misfits ``misfits[itarget, 0]``
and normalization factors ``misfits[itarget, 1]``
:returns: 1D array ``misfits[ibootstrap]`` if ``ibootstrap`` is
``None``
'''
exp, root = self.get_norm_functions()
ms = misfits[:, 0]
ns = misfits[:, 1]
def combine_misfits(
self, misfits,
extra_weights=None,
get_contributions=False):
w = self.get_bootstrap_weights(nbootstrap, ibootstrap) * \
self.get_target_weights() * self.inter_group_weights(ns)
if ibootstrap is None:
return root(
num.nansum(exp(w*ms[num.newaxis, :]), axis=1) /
num.nansum(exp(w*ns[num.newaxis, :]), axis=1))
return root(num.nansum(exp(w*ms)) / num.nansum(exp(w*ns)))
def bootstrap_misfits(self, misfits, nbootstrap, ibootstrap=None):
'''
:param misifts: 3D array with target misfits
``misfits[imodel, itarget, 0]`` and normalization factors
``misfits[imodel, itarget, 1]``
:returns: 2D array ``misfits[imodel, ibootstrap]``
'''
exp, root = self.get_norm_functions()
w = self.get_bootstrap_weights(
nbootstrap, ibootstrap)[num.newaxis, :] * \
self.get_target_weights()[num.newaxis, :] * \
self.inter_group_weights2(misfits[:, :, 1])
bms = root(num.nansum(exp(w*misfits[:, :, 0]), axis=1) /
num.nansum(exp(w*misfits[:, :, 1]), axis=1))
if misfits.ndim == 2:
misfits = misfits[num.newaxis, :, :]
return self.combine_misfits(misfits, extra_weights)[0, ...]
return bms
assert misfits.ndim == 3
assert extra_weights is None or extra_weights.ndim == 2
def global_misfit(self, misfits):
'''
:param misifts: 2D array with target misfits ``misfits[itarget, 0]``
and normalization factors ``misfits[itarget, 1]``
:returns: scalar global misfit
'''
exp, root = self.get_norm_functions()
if extra_weights is not None:
w = extra_weights[num.newaxis, :, :] \
* self.get_target_weights()[num.newaxis, num.newaxis, :] \
* self.inter_family_weights2(
misfits[:, :, 1])[:, num.newaxis, :]
ws = self.get_target_weights() * \
self.inter_group_weights(misfits[:, 1])
if get_contributions:
return exp(w*misfits[:, num.newaxis, :, 0]) \
/ num.nansum(exp(w*misfits[:, num.newaxis, :, 1]), axis=2)
return root(num.nansum(exp(ws*misfits[:, 0])) /
num.nansum(exp(ws*misfits[:, 1])))
def global_misfits(self, misfits):
'''
:param misifts: 3D array with target misfits
``misfits[imodel, itarget, 0]`` and normalization factors
``misfits[imodel, itarget, 1]``
:returns: 1D array ``global_misfits[imodel]``
'''
exp, root = self.get_norm_functions()
ws = self.get_target_weights()[num.newaxis, :] * \
self.inter_group_weights2(misfits[:, :, 1])
return root(num.nansum(exp(ws*misfits[:, :, 0]), axis=1) /
num.nansum(exp(ws*misfits[:, :, 1]), axis=1))
def global_contributions(self, misfits):
exp, root = self.get_norm_functions()
ws = self.get_target_weights()[num.newaxis, :] * \
self.inter_group_weights2(misfits[:, :, 1])
return root(
num.nansum(exp(w*misfits[:, num.newaxis, :, 0]), axis=2) /
num.nansum(exp(w*misfits[:, num.newaxis, :, 1]), axis=2))
else:
w = self.get_target_weights()[num.newaxis, :] \
* self.inter_family_weights2(misfits[:, :, 1])
gcms = exp(ws*misfits[:, :, 0]) / \
num.nansum(exp(ws*misfits[:, :, 1]), axis=1)[:, num.newaxis]
if get_contributions:
return exp(w*misfits[:, :, 0]) \
/ num.nansum(exp(w*misfits[:, :, 1]), axis=1)
return gcms
return root(
num.nansum(exp(w*misfits[:, :, 0]), axis=1) /
num.nansum(exp(w*misfits[:, :, 1]), axis=1))
def make_group_mask(self):
def make_family_mask(self):
family_names = set()
families = num.zeros(len(self.targets), dtype=num.int)
......@@ -397,11 +338,11 @@ class Problem(Object):
return families, len(family_names)
def get_group_mask(self):
if self._group_mask is None:
self._group_mask = self.make_group_mask()
def get_family_mask(self):
if self._family_mask is None:
self._family_mask = self.make_family_mask()
return self._group_mask
return self._family_mask
def evaluate(self, x, dump_data=False):
raise NotImplementedError()
......
......@@ -105,7 +105,7 @@ class MisfitTarget(Object):
raise NotImplementedError()
def get_combined_weight(self, apply_balancing_weights=False):
raise NotImplementedError()
return 1.0
__all__ = '''
......
import numpy as num
from pyrocko.guts import Object, Float, Dict, List, String
from pyrocko import gf
from grond import Problem, Parameter, MisfitTarget
guts_prefix = 'grond.toy'
class ToyTarget(MisfitTarget):
north = Float.T()
east = Float.T()
depth = Float.T()
obs_distance = Float.T()
class ToySource(Object):
north = Float.T()
east = Float.T()
depth = Float.T()
class ToyProblem(Problem):
problem_parameters = [
Parameter('north', 'm', label='North'),
Parameter('east', 'm', label='East'),
Parameter('depth', 'm', label='Depth')]
ranges = Dict.T(String.T(), gf.Range.T())
targets = List.T(ToyTarget.T())
base_source = ToySource.T()
def __init__(self, **kwargs):
Problem.__init__(self, **kwargs)
self._xtargets = None
self._obs_distances = None
def pack(self, source):
return num.array(
[source.north, source.east, source.depth], dtype=num.float)
def _setup_modelling(self):
if self._xtargets is None:
self._xtargets = num.array(
[(t.north, t.east, t.depth) for t in self.targets],
dtype=num.float)
self._obs_distances = num.array(
[t.obs_distance for t in self.targets],
dtype=num.float)
def evaluate(self, x, mask=None):
self._setup_modelling()
distances = num.sqrt(
num.sum((x[num.newaxis, :]-self._xtargets)**2, axis=1))
misfits = num.zeros((self.ntargets, 2))
misfits[:, 0] = num.abs(distances - self._obs_distances)
misfits[:, 1] = num.ones(self.ntargets) \
* num.mean(num.abs(self._obs_distances))
return misfits
def evaluate_many(self, xs):
self._setup_modelling()
distances = num.sqrt(
num.sum(
(xs[:, num.newaxis, :]-self._xtargets[num.newaxis, :])**2,
axis=2))
misfits = num.zeros((xs.shape[0], self.ntargets, 2))
misfits[:, :, 0] = num.abs(