Commit 0ead9fe5 authored by Sebastian Heimann's avatar Sebastian Heimann

rewrite/improved optimizer with history container (ProblemData)

parent 092f75f3
......@@ -15,7 +15,7 @@ setup(
version='0.1',
author='Sebastian Heimann',
author_email='sebastian.heimann@gfz-potsdam.de',
packages=['grond', 'grond.baraddur', 'grond.problems', 'grond.solvers',
packages=['grond', 'grond.baraddur', 'grond.problems', 'grond.optimizers',
'grond.analysers', 'grond.listeners', 'grond.targets'],
python_requires='>=3.5',
scripts=['apps/grond'],
......
......@@ -5,6 +5,6 @@ from .targets import * # noqa
from .meta import * # noqa
from .synthetic_tests import * # noqa
from .solvers import * # noqa
from .optimizers import * # noqa
__version__ = '0.2'
from .base import *
from .base import * # noqa
......@@ -57,7 +57,7 @@ class Analyser(object):
isok_mask = num.logical_not(isbad_mask)
else:
isok_mask = None
_, ms = wproblem.evaluate(x, mask=isok_mask)
ms = wproblem.evaluate(x, mask=isok_mask)[:, 1]
mss[iiter, :] = ms
isbad_mask = num.isnan(ms)
......@@ -80,10 +80,10 @@ class Analyser(object):
class AnalyserConfig(Object):
niter = Int.T(default=1000)
niterations = Int.T(default=1000)
def get_analyser(self):
return Analyser(niter=self.niter)
return Analyser(niter=self.niterations)
__all__ = '''
......
......@@ -14,13 +14,15 @@ from pyrocko import orthodrome as od, gf, trace, guts, util, weeding
from pyrocko import parimap, model, marker as pmarker
from .dataset import DatasetConfig, NotFound
from .problems.base import ProblemConfig, Problem
from .solvers.base import SolverConfig
from .problems.base import ProblemConfig, Problem, \
load_problem_info_and_data, load_problem_data
from .optimizers.base import OptimizerConfig, BadProblem
from .targets.base import TargetGroup
from .analysers.base import AnalyserConfig
from .listeners import TerminalListener
from .meta import Path, HasPaths, expand_template, xjoin, GrondError, \
Notifier, Forbidden
from .meta import Path, HasPaths, expand_template, GrondError, Notifier, \
Forbidden
logger = logging.getLogger('grond.core')
guts_prefix = 'grond'
......@@ -57,10 +59,6 @@ def weed(origin, targets, limit, neighborhood=3):
return targets_weeded, meandists_kept, deleted
class BadProblem(Exception):
pass
class EngineConfig(HasPaths):
gf_stores_from_pyrocko_config = Bool.T(default=True)
gf_store_superdirs = List.T(Path.T())
......@@ -87,7 +85,7 @@ class Config(HasPaths):
target_groups = List.T(TargetGroup.T())
problem_config = ProblemConfig.T()
analyser_config = AnalyserConfig.T(default=AnalyserConfig.D())
solver_config = SolverConfig.T(default=SolverConfig.D())
optimizer_config = OptimizerConfig.T()
engine_config = EngineConfig.T(default=EngineConfig.D())
def __init__(self, *args, **kwargs):
......@@ -128,75 +126,6 @@ def sarr(a):
return ' '.join('%15g' % x for x in a)
def load_config(dirname):
fn = op.join(dirname, 'config.yaml')
return guts.load(filename=fn)
def load_problem_info_and_data(dirname, subset=None):
problem = load_problem_info(dirname)
xs, misfits = load_problem_data(xjoin(dirname, subset), problem)
return problem, xs, misfits
def load_problem_info(dirname):
fn = op.join(dirname, 'problem.yaml')
return guts.load(filename=fn)
def load_optimizer_history(dirname, problem):
fn = op.join(dirname, 'accepted')
with open(fn, 'r') as f:
nmodels = os.fstat(f.fileno()).st_size // (problem.nbootstrap+1)
data1 = num.fromfile(
f,
dtype='<i1',
count=nmodels*(problem.nbootstrap+1)).astype(num.bool)
accepted = data1.reshape((nmodels, problem.nbootstrap+1))
fn = op.join(dirname, 'choices')
with open(fn, 'r') as f:
data2 = num.fromfile(
f,
dtype='<i8',
count=nmodels*2).astype(num.int64)
ibootstrap_choices, imodel_choices = data2.reshape((nmodels, 2)).T
return ibootstrap_choices, imodel_choices, accepted
def load_problem_data(dirname, problem, skip_models=0):
fn = op.join(dirname, 'models')
with open(fn, 'r') as f:
nmodels = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
nmodels -= skip_models
f.seek(skip_models * problem.nparameters * 8)
data1 = num.fromfile(
f, dtype='<f8',
count=nmodels * problem.nparameters)\
.astype(num.float)
nmodels = data1.size//problem.nparameters - skip_models
xs = data1.reshape((nmodels, problem.nparameters))
fn = op.join(dirname, 'misfits')
with open(fn, 'r') as f:
f.seek(skip_models * problem.ntargets * 2 * 8)
data2 = num.fromfile(
f, dtype='<f8', count=nmodels*problem.ntargets*2).astype(num.float)
data2 = data2.reshape((nmodels, problem.ntargets*2))
combi = num.empty_like(data2)
combi[:, 0::2] = data2[:, :problem.ntargets]
combi[:, 1::2] = data2[:, problem.ntargets:]
misfits = combi.reshape((nmodels, problem.ntargets, 2))
return xs, misfits
def get_mean_x(xs):
return num.mean(xs, axis=0)
......@@ -264,26 +193,6 @@ def write_config(config, path):
config.change_basepath(basepath)
def bootstrap_outliers(problem, misfits, std_factor=1.0):
'''
Identify bootstrap configurations performing bad in global configuration
'''
raise Exception('this function is broken')
gms = problem.global_misfits(misfits)
ibests = []
for ibootstrap in range(problem.nbootstrap):
bms = problem.bootstrap_misfits(misfits, ibootstrap)
ibests.append(num.argmin(bms))
m = num.median(gms[ibests])
s = num.std(gms[ibests])
return num.where(gms > m+s)[0]
def forward(rundir_or_config_path, event_names):
if not event_names:
......@@ -326,7 +235,7 @@ def forward(rundir_or_config_path, event_names):
events = []
for (problem, x) in payload:
ds.empty_cache()
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
event = problem.get_source(x).pyrocko_event()
events.append(event)
......@@ -352,7 +261,9 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
else:
xs, misfits = load_problem_data(rundir, problem)
config = load_config(rundir)
optimizer_fn = op.join(rundir, 'optimizer.yaml')
optimizer = guts.load(filename=optimizer_fn)
nbootstrap = optimizer.nbootstrap
dumpdir = op.join(rundir, 'harvest')
if op.exists(dumpdir):
......@@ -371,9 +282,8 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
ibests_list.append(isort[:nbest])
if weed != 3:
for ibootstrap in range(config.solver_config.nbootstrap):
bms = problem.bootstrap_misfits(
misfits, config.solver_config.nbootstrap, ibootstrap)
for ibootstrap in range(nbootstrap):
bms = problem.bootstrap_misfits(misfits, nbootstrap, ibootstrap)
isort = num.argsort(bms)
ibests_list.append(isort[:nbest])
ibests.append(isort[0])
......@@ -397,10 +307,7 @@ def harvest(rundir, problem=None, nbest=10, force=False, weed=0):
ibests = ibests[gms[ibests] < mean_gm_best]
for i in ibests:
x = xs[i]
ms = misfits[i, :, 0]
ns = misfits[i, :, 1]
problem.dump_problem_data(dumpdir, x, ms, ns)
problem.dump_problem_data(dumpdir, xs[i], misfits[i, :, :])
def get_event_names(config):
......@@ -455,7 +362,7 @@ def check(
if n_random_synthetics == 0:
x = problem.pack(problem.base_source)
sources.append(problem.base_source)
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
results_list.append(results)
else:
......@@ -470,7 +377,7 @@ def check(
pass
sources.append(problem.get_source(x))
ms, ns, results = problem.evaluate(x, result_mode='full')
_, results = problem.evaluate(x, result_mode='full')
results_list.append(results)
if show_waveforms:
......@@ -691,9 +598,6 @@ def process_event(ievent, g_data_id):
config, force, status, nparallel, event_names = g_state[g_data_id]
if nparallel > 1:
status = ()
event_name = event_names[ievent]
ds = config.get_dataset(event_name)
......@@ -756,14 +660,19 @@ def process_event(ievent, g_data_id):
# movie_filename='grond_opt_time_magnitude.mp4')
try:
solver = config.solver_config.get_solver()
solver.solve(
optimizer = config.optimizer_config.get_optimizer()
if xs_inject is not None:
from .optimizers import highscore
if not isinstance(optimizer, highscore.HighScoreOptimizer()):
raise GrondError(
'optimizer does not support injections')
optimizer.sampler_phases[0:0] = [
highscore.InjectionSamplerPhase(xs_inject=xs_inject)]
optimizer.optimize(
problem,
rundir=rundir,
status=status,
# plot=splot,
xs_inject=xs_inject,
notifier=notifier)
rundir=rundir)
harvest(rundir, problem, force=True)
......@@ -947,9 +856,6 @@ def export(what, rundirs, type=None, pnames=None, filename=None):
__all__ = '''
EngineConfig
Config
load_problem_info
load_problem_info_and_data
load_optimizer_history
read_config
write_config
forward
......
from __future__ import print_function
import glob
import copy
import logging
......
......@@ -6,11 +6,22 @@ from collections import OrderedDict
from pyrocko.guts import Object
from ..meta import GrondError
guts_prefix = 'grond'
logger = logging.getLogger('grond.solver')
class BadProblem(GrondError):
pass
class SimpleTimedelta(timedelta):
def __str__(self):
return timedelta.__str__(self).split('.')[0]
class RingBuffer(num.ndarray):
def __new__(cls, *args, **kwargs):
cls = num.ndarray.__new__(cls, *args, **kwargs)
......@@ -70,23 +81,18 @@ class SolverState(object):
return len(self.parameter_names)
class Solver(object):
state = SolverState()
class Optimizer(Object):
def solve(
self, problem, rundir=None, status=(), plot=None, xs_inject=None,
notifier=None):
def optimize(self, problem):
raise NotImplemented()
class SolverConfig(Object):
def get_solver(self):
return Solver()
class OptimizerConfig(Object):
pass
__all__ = '''
Solver
SolverState
SolverConfig
BadProblem
Optimizer
OptimizerConfig
'''.split()
This diff is collapsed.
This diff is collapsed.
from .cmt import CMTProblem, CMTProblemConfig # noqa
from .rectangular import RectangularProblem, RectangularProblemConfig # noqa
from .double_dc import DoubleDCProblem, DoubleDCProblemConfig # noqa
from .base import * # noqa
from .cmt import * # noqa
from .rectangular import * # noqa
from .double_dc import * # noqa
import numpy as num
import math
import copy
import logging
import os.path as op
import os
from pyrocko import gf, util, guts
from pyrocko.guts import Object, String, Bool, List, Dict, Int
from ..meta import ADict, Parameter, GrondError
from ..meta import ADict, Parameter, GrondError, xjoin
from ..targets import WaveformMisfitTarget, SatelliteMisfitTarget
......@@ -16,6 +18,10 @@ km = 1e3
as_km = dict(scale_factor=km, scale_unit='km')
def nextpow2(i):
return 2**int(math.ceil(math.log(i)/math.log(2.)))
class ProblemConfig(Object):
name_template = String.T()
apply_balancing_weights = Bool.T(default=True)
......@@ -82,7 +88,7 @@ class Problem(Object):
guts.dump(self, filename=fn)
def dump_problem_data(
self, dirname, x, ms, ns,
self, dirname, x, misfits,
accept=None, ibootstrap_choice=None, ibase=None):
fn = op.join(dirname, 'models')
......@@ -93,8 +99,7 @@ class Problem(Object):
fn = op.join(dirname, 'misfits')
with open(fn, 'ab') as f:
ms.astype('<f8').tofile(f)
ns.astype('<f8').tofile(f)
misfits.astype('<f8').tofile(f)
if None not in (ibootstrap_choice, ibase):
fn = op.join(dirname, 'choices')
......@@ -184,6 +189,16 @@ class Problem(Object):
def random_uniform(self, xbounds):
raise NotImplementedError()
def extract(self, xs, i):
if xs.ndim == 1:
return self.extract(xs[num.newaxis, :], i)[0]
if i < self.nparameters:
return xs[:, i]
else:
return self.make_dependant(
xs, self.dependants[i-self.nparameters].name)
def make_bootstrap_weights(self, nbootstrap, type='classic'):
ntargets = self.ntargets
ws = num.zeros((nbootstrap, ntargets))
......@@ -265,7 +280,12 @@ class Problem(Object):
return num.array(out, dtype=num.float)
def get_dependant_bounds(self):
return None
return num.zeros((0, 2))
def get_combined_bounds(self):
return num.vstack((
self.get_parameter_bounds(),
self.get_dependant_bounds()))
def raise_invalid_norm_exponent(self):
raise GrondError('invalid norm exponent' % self.norm_exponent)
......@@ -286,16 +306,20 @@ class Problem(Object):
else:
self.raise_invalid_norm_exponent()
def bootstrap_misfit(self, ms, ns, nbootstrap, ibootstrap=None):
def bootstrap_misfit(self, misfits, nbootstrap, ibootstrap=None):
exp, root = self.get_norm_functions()
w = self.get_target_weights() * self.inter_group_weights(ns)
ms = misfits[:, 0]
ns = misfits[:, 1]
w = self.get_bootstrap_weights(nbootstrap, ibootstrap) * \
self.get_target_weights() * self.inter_group_weights(ns)
if ibootstrap is None:
return root(
num.nansum(exp(w*ms[num.newaxis, :]), axis=1) /
num.nansum(exp(w*ns[num.newaxis, :]), axis=1))
w *= self.get_bootstrap_weights(nbootstrap, ibootstrap)
return root(num.nansum(exp(w*ms)) / num.nansum(exp(w*ns)))
def bootstrap_misfits(self, misfits, nbootstrap, ibootstrap=None):
......@@ -319,20 +343,21 @@ class Problem(Object):
# bms = num.sqrt(num.nansum((w*misfits[:, :, 0])**2, axis=1))
return bms
def global_misfit(self, ms, ns):
def global_misfit(self, misfits):
exp, root = self.get_norm_functions()
ws = self.get_target_weights() * self.inter_group_weights(ns)
m = root(num.nansum(exp(ws*ms)) / num.nansum(exp(ws*ns)))
return m
ws = self.get_target_weights() * \
self.inter_group_weights(misfits[:, 1])
return root(num.nansum(exp(ws*misfits[:, 0])) /
num.nansum(exp(ws*misfits[:, 1])))
def global_misfits(self, misfits):
exp, root = self.get_norm_functions()
ws = self.get_target_weights()[num.newaxis, :] * \
self.inter_group_weights2(misfits[:, :, 1])
gms = root(num.nansum(exp(ws*misfits[:, :, 0]), axis=1) /
num.nansum(exp(ws*misfits[:, :, 1]), axis=1))
return gms
return root(num.nansum(exp(ws*misfits[:, :, 0]), axis=1) /
num.nansum(exp(ws*misfits[:, :, 1]), axis=1))
def global_contributions(self, misfits):
exp, root = self.get_norm_functions()
......@@ -380,3 +405,170 @@ class Problem(Object):
results.append(result)
return results
class ModelHistory(object):
nmodels_capacity_min = 1024
def __init__(self, problem, path=None, mode='r'):
self.problem = problem
self.path = path
self._models_buffer = None
self._misfits_buffer = None
self.models = None
self.misfits = None
self.nmodels_capacity = self.nmodels_capacity_min
self.listeners = []
self.mode = mode
if mode == 'r':
models, misfits = load_problem_data(path, problem)
self.extend(models, misfits)
@property
def nmodels(self):
if self.models is None:
return 0
else:
return self.models.shape[0]
@nmodels.setter
def nmodels(self, nmodels_new):
assert 0 <= nmodels_new <= self.nmodels
self.models = self._models_buffer[:nmodels_new, :]
self.misfits = self._misfits_buffer[:nmodels_new, :, :]
@property
def nmodels_capacity(self):
if self._models_buffer is None:
return 0
else:
return self._models_buffer.shape[0]
@nmodels_capacity.setter
def nmodels_capacity(self, nmodels_capacity_new):
if self.nmodels_capacity != nmodels_capacity_new:
models_buffer = num.zeros(
(nmodels_capacity_new, self.problem.nparameters),
dtype=num.float)
misfits_buffer = num.zeros(
(nmodels_capacity_new, self.problem.ntargets, 2),
dtype=num.float)
ncopy = min(self.nmodels, nmodels_capacity_new)
if self._models_buffer is not None:
models_buffer[:ncopy, :] = \
self._models_buffer[:ncopy, :]
misfits_buffer[:ncopy, :, :] = \
self._misfits_buffer[:ncopy, :, :]
self._models_buffer = models_buffer
self._misfits_buffer = misfits_buffer
def clear(self):
self.nmodels = 0
self.nmodels_capacity = self.nmodels_capacity_min
def extend(self, models, misfits):
nmodels = self.nmodels
n = models.shape[0]
nmodels_capacity_want = max(
self.nmodels_capacity_min, nextpow2(nmodels + n))
if nmodels_capacity_want != self.nmodels_capacity:
self.nmodels_capacity = nmodels_capacity_want
self._models_buffer[nmodels:nmodels+n, :] = models
self._misfits_buffer[nmodels:nmodels+n, :, :] = misfits
self.models = self._models_buffer[:nmodels+n, :]
self.misfits = self._misfits_buffer[:nmodels+n, :, :]
if self.path and self.mode == 'w':
for i in xrange(n):
self.problem.dump_problem_data(
self.path, models[i, :], misfits[i, :, :])
self.emit('extend', nmodels, n, models, misfits)
def append(self, model, misfits):
nmodels = self.nmodels
nmodels_capacity_want = max(
self.nmodels_capacity_min, nextpow2(nmodels + 1))
if nmodels_capacity_want != self.nmodels_capacity:
self.nmodels_capacity = nmodels_capacity_want
self._models_buffer[nmodels, :] = model
self._misfits_buffer[nmodels, :, :] = misfits
self.models = self._models_buffer[:nmodels+1, :]
self.misfits = self._misfits_buffer[:nmodels+1, :, :]
if self.path and self.mode == 'w':
self.problem.dump_problem_data(
self.path, model, misfits)
self.emit(
'extend', nmodels, 1,
model[num.newaxis, :], misfits[num.newaxis, :, :])
def add_listener(self, listener):
self.listeners.append(listener)
def emit(self, event_name, *args, **kwargs):
for listener in self.listeners:
getattr(listener, event_name)(*args, **kwargs)
def load_problem_info_and_data(dirname, subset=None):
problem = load_problem_info(dirname)
xs, misfits = load_problem_data(xjoin(dirname, subset), problem)
return problem, xs, misfits
def load_problem_info(dirname):
fn = op.join(dirname, 'problem.yaml')
return guts.load(filename=fn)
def load_problem_data(dirname, problem, skip_models=0):
fn = op.join(dirname, 'models')
with open(fn, 'r') as f:
nmodels = os.fstat(f.fileno()).st_size // (problem.nparameters * 8)
nmodels -= skip_models
f.seek(skip_models * problem.nparameters * 8)
data1 = num.fromfile(
f, dtype='<f8',
count=nmodels * problem.nparameters)\
.astype(num.float)