Commit 85558ec5 authored by Marius Kriegerowski's avatar Marius Kriegerowski

evaluation separation

parent 4ff64f2b
......@@ -63,10 +63,12 @@ class DataGeneratorBase(Object):
else:
self._shape = v
def generate(self):
record_iterator = tf.python_io.tf_record_iterator(
path=self.fn_tfrecord)
@property
def output_shapes(self):
return (self.tensor_shape, self.n_classes)
def unpack_examples(self, record_iterator):
'''Parse examples stored in TFRecordData to `tf.train.Example`'''
for string_record in record_iterator:
example = tf.train.Example()
example.ParseFromString(string_record)
......@@ -78,9 +80,10 @@ class DataGeneratorBase(Object):
label = num.fromstring(label, dtype=num.float32)
yield chunk, label
@property
def output_shapes(self):
return (self.tensor_shape, self.n_classes)
def generate(self):
record_iterator = tf.python_io.tf_record_iterator(
path=self.fn_tfrecord)
return self.unpack_examples(record_iterator)
def get_dataset(self):
return tf.data.Dataset.from_generator(
......@@ -88,16 +91,20 @@ class DataGeneratorBase(Object):
self.generate_output_types,
output_shapes=self.output_shapes)
def write(self, directory):
'''Write data to TFRecordDataset using `self.writer`.'''
writer = tf.python_io.TFRecordWriter(directory)
def pack_examples(self, generator):
'''Serialize Examples to strings.'''
for ydata, label in self.generate():
ex = tf.train.Example(
yield tf.train.Example(
features=tf.train.Features(
feature={
'data': _BytesFeature(ydata.tobytes()),
'label': _BytesFeature(num.array(label, dtype=num.float32).tobytes()),
}))
def write(self, directory):
'''Write example data to TFRecordDataset using `self.writer`.'''
writer = tf.python_io.TFRecordWriter(directory)
for ex in self.pack_examples(writer):
writer.write(ex.SerializeToString())
......@@ -187,6 +194,7 @@ class PileData(DataGenerator):
data_format = String.T(default='mseed')
fn_markers = String.T()
deltat_want = Float.T(optional=True)
shuffle = Bool.T(default=False)
def setup(self):
self.data_pile = pile.make_pile(
......@@ -198,6 +206,8 @@ class PileData(DataGenerator):
self.deltat_want = self.deltat_want or \
min(self.data_pile.deltats.keys())
self.n_samples_max = int(self.sample_length/self.deltat_want)
logging.debug('loading markers')
markers = marker.load_markers(self.fn_markers)
marker.associate_phases_to_events(markers)
......@@ -212,7 +222,11 @@ class PileData(DataGenerator):
assert(len(markers_by_nsl) == 1)
self.markers = list(markers_by_nsl.values())[0]
self.markers.sort(key=lambda x: x.tmin)
if self.shuffle:
random.shuffle(self.markers)
else:
self.markers.sort(key=lambda x: x.tmin)
self.channels = list(self.data_pile.nslc_ids.keys())
self.tensor_shape = (len(self.channels), self.n_samples_max)
......@@ -233,11 +247,9 @@ class PileData(DataGenerator):
elif tr.deltat - self.deltat_want < -EPSILON:
tr.downsample_to(self.deltat_want)
def generate(self, shuffle=False):
def generate(self):
tr_len = self.n_samples_max * self.deltat_want
nslc_to_index = {nslc: idx for idx, nslc in enumerate(self.channels)}
if shuffle:
random.shuffle(self.markers)
for m in self.markers:
event = m.get_event()
if event is None:
......@@ -246,14 +258,14 @@ class PileData(DataGenerator):
for trs in self.data_pile.chopper(
tmin=m.tmin, tmax=m.tmin+tr_len, keep_current_files_open=True):
_trs = []
for tr in trs:
tr.data = tr.ydata.astype(num.float)
_trs.append(tr)
trs = _trs
if not len(trs):
logging.debug('No data at tmin=%s' % m.tmin)
continue
for tr in trs:
tr.data = tr.ydata.astype(num.float)
chunk = self.get_raw_data_chunk()
indices = [nslc_to_index[tr.nslc_id] for tr in trs]
self.fit_data_into_chunk(trs, chunk, indices=indices, tref=m.tmin)
......@@ -264,7 +276,6 @@ class PileData(DataGenerator):
yield chunk, self.extract_labels(event)
class GFSwarmData(DataGenerator):
swarm = source_region.Swarm.T()
n_sources = Int.T(default=100)
......
......@@ -14,11 +14,11 @@ import shutil
logger = logging.getLogger('pinky.model')
class Model(Object):
hyperparameter_optimizer = Optimizer.T(optional=True)
data_generator = DataGeneratorBase.T()
evaluation_data_generator = DataGeneratorBase.T(optional=True)
dropout_rate = Float.T(optional=True)
batch_size = Int.T(default=10)
n_epochs = Int.T(default=1)
......@@ -46,9 +46,7 @@ class Model(Object):
mean=0.0, stddev=0.1)
def generate_input(self):
'''
Generates data and labels
'''
''' Generates data and labels. '''
dataset = self.data_generator.get_dataset()
dataset = dataset.batch(self.batch_size)
if self.shuffle_size:
......@@ -58,6 +56,12 @@ class Model(Object):
return dataset.make_one_shot_iterator().get_next()
def generate_evaluation_input(self):
''' Generatas evaluation data and labels.'''
self.evaluation_data_generator.get_dataset()
dataset = dataset.batch(self.batch_size)
return dataset.make_one_shot_iterator().get_next()
def time_axis_cnn(self, input, n_filters, cross_channel_kernel=None,
kernel_width=1, name=None, training=False):
'''
......@@ -108,22 +112,20 @@ class Model(Object):
n_filters = params.get('base_capacity', 32)
kernel_width = params.get('kernel_width', 2)
kernel_height = params.get('kernel_height', 2)
kernel_width_factor = params.get('kernel_width_factor', 1)
self.activation = params.get('activation', tf.nn.relu)
n_channels, n_samples = self.data_generator.tensor_shape
input = tf.reshape(features, [-1, n_channels, n_samples, 1])
# tf.summary.image('input', features)
# if self.debug:
# tf.summary.image('input', features)
for ilayer in range(params.get('n_layers', 3)):
input = self.time_axis_cnn(input, n_filters * (1 + ilayer), 1,
input = self.time_axis_cnn(input, n_filters * (1 + ilayer),
cross_channel_kernel=kernel_height,
kernel_width=int(kernel_width + ilayer*kernel_width_factor),
name='conv_%s' % ilayer, training=training)
# conv = self.time_axis_cnn(conv, n_filters*2, 1, kernel_width=2, name='conv2',
# training=training)
# conv = self.time_axis_cnn(conv, n_filters*4, 2, kernel_width=3, name='conv3',
# training=training)
# conv = self.time_axis_cnn(features, n_filters, None, kernel_width=3, name='conv1',
# training=training)
fc = tf.contrib.layers.flatten(input)
fc = tf.layers.dense(fc, params.get('n_filters_dense', 512),
......@@ -187,8 +189,20 @@ class Model(Object):
model_fn=self.model, model_dir=self.outdir, params=params)
est.train(input_fn=self.generate_input)
result = est.evaluate(input_fn=self.generate_input, steps=1)
return result
def evaluate(self, params=None):
logging.info('====== start evaluation')
params = params or {}
if self.evaluation_data_generator is None:
logging.warn(
'No evaluation data generator set! Can\'t evalauate after training')
return
with self.sess as default:
est = tf.estimator.Estimator(
model_fn=self.model, model_dir=self.outdir, params=params)
return est.evaluate(input_fn=self.generate_evaluation_input, steps=1,
params=params)
def optimize(self):
if self.hyperparameter_optimizer is None:
......@@ -220,6 +234,7 @@ def main():
if args.debug:
logger.setLevel(logging.DEBUG)
logger.debug('Debug level active')
if args.config:
model = guts.load(filename=args.config)
......@@ -263,12 +278,14 @@ def main():
hyperparameter_optimizer=optimizer)
model.regularize()
print(model)
if args.train and args.optimize:
print('Can only use --train or --optimize')
print('Can only use either --train or --optimize')
sys.exit()
if args.train:
model.train()
model.evaluate()
elif args.optimize:
model.optimize()
import matplotlib as mpl
mpl.use('PDF')
# import matplotlib as mpl
# mpl.use('PDF')
import os
import tensorflow as tf
from .util import delete_if_exists
from skopt import gp_minimize
from skopt import dump as dump_result
......
......@@ -29,9 +29,9 @@ def show_data(model, shuffle=False):
fig_w, axs_w = plt.subplots(math.ceil(n/n_rows), n_rows)
axs_w = flatten(axs_w)
model.data_generator.shuffle = shuffle
for i, (chunk, labels) in enumerate(
model.data_generator.generate(shuffle=shuffle)):
model.data_generator.generate()):
if i == n:
break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment