Commit 943fcfb9 authored by Marius Kriegerowski's avatar Marius Kriegerowski

started optimizer implementation

parent 10a6c31b
......@@ -5,12 +5,13 @@ from pyrocko.io import save, load
from pyrocko.model import load_stations
from pyrocko.guts import Object, String, Int, Float, Tuple, Bool
from pyrocko.gui import marker
from pyrocko.gf.seismosizer import Engine, Target
from pyrocko.gf.seismosizer import Engine, Target, LocalEngine
from pyrocko import orthodrome
from pyrocko import pile
from swarm import synthi
import logging
import random
import numpy as num
import os
import glob
......@@ -102,10 +103,10 @@ class DataGeneratorBase(Object):
class DataGenerator(DataGeneratorBase):
absolute = Bool.T(help='Use absolute amplitudes', default=False)
effective_deltat = Float.T(optional=True)
sample_length = Float.T(help='length [s] of data window')
fn_stations = String.T()
absolute = Bool.T(help='Use absolute amplitudes', default=False)
effective_deltat = Float.T(optional=True)
reference_target = Target.T(
default=Target(
codes=('', 'NKC', '', 'SHZ'),
......@@ -181,13 +182,13 @@ class DataGenerator(DataGeneratorBase):
class PileData(DataGenerator):
'''Data generator for locally saved data.'''
data_path = String.T()
data_format = String.T(default='mseed')
fn_markers = String.T()
deltat_want = Float.T(optional=True)
def setup(self):
self.data_pile = pile.make_pile(
self.data_path, fileformat=self.data_format)
......@@ -222,14 +223,21 @@ class PileData(DataGenerator):
'Different sampling rates in dataset. Preprocessing slow')
def preprocess(self, tr):
'''Trace preprocessing
Ensures equal sampling rates
:param tr: pyrocko.tr.Trace object'''
if tr.delta - self.deltat_want > EPSILON:
tr.resample(self.deltat_want)
elif tr.deltat - self.deltat_want < -EPSILON:
tr.downsample_to(self.deltat_want)
def generate(self):
def generate(self, shuffle=False):
tr_len = self.n_samples_max * self.deltat_want
nslc_to_index = {nslc: idx for idx, nslc in enumerate(self.channels)}
if shuffle:
random.shuffle(self.markers)
for m in self.markers:
event = m.get_event()
if event is None:
......@@ -311,3 +319,13 @@ class OnTheFlyData(DataGenerator):
yield chunk, self.extract_labels(source)
@classmethod
def get_example(cls):
gf_engine = LocalEngine(
use_config=True,
store_superdirs=['/data/stores'],
default_store_id='vogtland_001')
return cls(fn_stations='stations.pf', gf_engine=gf_engine,
sample_length=10)
from .data import *
from .tf_util import *
from .optimize import Optimizer
import tensorflow as tf
from pyrocko import guts
from pyrocko.guts import Object, Float, Bool
from pyrocko.orthodrome import distance_accurate50m
from pyrocko.gf.seismosizer import LocalEngine
import logging
import shutil
......@@ -16,6 +15,7 @@ logger = logging.getLogger('pinky.model')
class Model(Object):
optimizer = Optimizer.T(optional=True)
data_generator = DataGeneratorBase.T()
dropout_rate = Float.T(optional=True)
batch_size = Int.T(default=10)
......@@ -27,6 +27,7 @@ class Model(Object):
optional=True, help='if set, shuffle examples at given buffer size.')
def __init__(self, tf_config=None, debug=False, **kwargs):
print(kwargs)
super().__init__(**kwargs)
if self.auto_clear and os.path.exists(self.summary_outdir):
......@@ -81,7 +82,7 @@ class Model(Object):
input = tf.layers.max_pooling2d(
input,
pool_size=(2, 2), # (height, width)
strides=(1, 2)
strides=(1, 1),
)
if self.debug:
......@@ -102,11 +103,12 @@ class Model(Object):
features = tf.reshape(features, [-1, n_channels, n_samples, 1])
# tf.summary.image('input', features)
conv = self.time_axis_cnn(features, n_filters, 1, name='conv1',
# conv = self.time_axis_cnn(features, n_filters, None, kernel_width=3, name='conv1',
conv = self.time_axis_cnn(features, n_filters, 1, kernel_width=1, name='conv1',
training=training)
conv = self.time_axis_cnn(conv, n_filters*2, 1, name='conv2',
conv = self.time_axis_cnn(conv, n_filters*2, 1, kernel_width=1, name='conv2',
training=training)
conv = self.time_axis_cnn(conv, n_filters*4, 2, name='conv3',
conv = self.time_axis_cnn(conv, n_filters*4, 2, kernel_width=1, name='conv3',
training=training)
fc = tf.contrib.layers.flatten(conv)
......@@ -137,10 +139,10 @@ class Model(Object):
# reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
# )
# tf.summary.scalar('lossalternative', lossalternative)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(
learning_rate=params.get('learning_rate', 1e-4))
# learning_rate=params.get('learning_rate', 1e-4))
learning_rate=params.get('learning_rate', 1e-4)[0])
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_global_step())
tf.summary.scalar('loss', loss)
......@@ -162,6 +164,7 @@ class Model(Object):
)
def train(self, params=None):
print(params)
params = params or {}
with self.sess as default:
......@@ -172,6 +175,12 @@ class Model(Object):
result = est.evaluate(input_fn=self.generate_input, steps=1)
return result
def optimize(self):
if self.optimizer is None:
print('No optimizer defined')
sys.exit()
self.optimizer.optimize(self)
def main():
import argparse
......@@ -180,12 +189,13 @@ def main():
description='')
parser.add_argument('--config')
parser.add_argument('--train', action='store_true')
parser.add_argument('--optimize', action='store_true')
parser.add_argument('--write-tfrecord-model', metavar='FILENAME',
help='write data_generator out to FILENAME')
parser.add_argument('--from-tfrecord', metavar='FILENAME',
help='read tfrecord')
parser.add_argument('--write')
parser.add_argument('--new-config', action='store_true')
parser.add_argument('--new-config')
parser.add_argument('--show-data', action='store_true')
parser.add_argument(
'--cpu', action='store_true', help='force CPU usage')
......@@ -208,7 +218,7 @@ def main():
if args.show_data:
from . import plot
plot.show_data(model)
plot.show_data(model, shuffle=True)
elif args.write_tfrecord_model:
import uuid
......@@ -227,21 +237,27 @@ def main():
model.data_generator = TFRecordData(fn_tfrecord=args.from_tfrecord)
elif args.new_config:
fn_config = 'model.config'
fn_config = args.new_config
if os.path.exists(fn_config):
print('file exists: %s' % fn_config)
sys.exit()
gf_engine = LocalEngine(
use_config=True,
store_superdirs=['/data/stores'],
default_store_id='vogtland_001')
data_generator = OnTheFlyData.get_example()
optimizer = Optimizer.get_example()
model = Model(
tf_config=tf_config,
data_generator=data_generator,
optimizer=optimizer)
data_generator = OnTheFlyData(fn_stations='stations.pf', gf_engine=gf_engine)
model = Model(tf_config=tf_config, data_generator=data_generator)
model.regularize()
model.dump(filename=fn_config)
print('created a fresh "%s"' % fn_config)
# model.dump(filename=fn_config)
print(model)
# print('created a fresh "%s"' % fn_config)
if args.train and args.optimize:
print('Can only use --train or --optimize')
if args.train:
model.train()
elif args.optimize:
model.optimize()
import skopt, gp_minimize
from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from pyrocko.guts import Object, Int, Float, List
from pyrocko.guts import Object, Int, Float, List, Tuple, String
class Optimizer(Object):
learning_rate = Tuple.T(Float.T(), 3, default=(1e-5, 1e-2, 1e-4)) # low, high, default
def to_skopt_real(x, name, prior):
return Real(low=x[0], high=x[1], prior=prior, name=name)
class Optimizer(Object):
learning_rate = Tuple.T(3, Float.T(), default=(1e-3, 1e-5, 1e-4)) # low, high, default
n_calls = Int.T(default=50)
path_best = String.T(default='winner')
def __init__(self, **kwargs):
super().__init__(kwargs)
super().__init__(**kwargs)
self.model = None
# self.dimensions = [
# to_skopt_real(self.learning_rate, 'learning_rate', 'log-uniform')]
self.dimensions = [
Real(low=1e-6, high=1e-2, prior='log-uniform',
name='learning_rate')
]
def evaluate(self, **kwargs):
print(self.dimensions)
def evaluate(self, *args):
''' wrapper to parse gp_minimize args to model.train'''
return self.model.train(**kwargs)['loss']
print(args)
args = dict(zip(['learning_rate',], args))
print(args)
return self.model.train(args)['loss']
def optimize(self, model):
self.model = model
default_parameters = [
self.learning_rate[-1]
]
gp_minimize(func=model.train, x0=default_parameters)
default_parameters = [self.learning_rate[-1]]
gp_minimize(
func=self.evaluate,
dimensions=self.dimensions,
acq_func='EI', # Expected Improvement
n_calls=self.n_calls,
x0=default_parameters,
)
def log_dir_name(self, learning_rate):
# The dir-name for the TensorBoard log-dir.
s = "./logs/lr_{0:.0e}_layers"
s = "./logs/lr_{0:.0e}_layers"
# Insert all the hyper-parameters in the dir-name.
log_dir = s.format(learning_rate)
log_dir = s.format(learning_rate)
return log_dir
return log_dir
@classmethod
def get_example(cls):
return cls()
if __name__ == '__main__':
print(skopt.__version__)
print(Optimizer.get_example())
......@@ -19,7 +19,7 @@ def adjust(fig):
left=0.01, right=0.99, top=0.98, bottom=0.02, wspace=0.02, hspace=0.07)
def show_data(model):
def show_data(model, shuffle=False):
yscale = 3.
n = 9
n_rows = 3
......@@ -30,7 +30,8 @@ def show_data(model):
fig_w, axs_w = plt.subplots(math.ceil(n/n_rows), n_rows)
axs_w = flatten(axs_w)
for i, (chunk, labels) in enumerate(model.data_generator.generate()):
for i, (chunk, labels) in enumerate(
model.data_generator.generate(shuffle=shuffle)):
if i == n:
break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment