Commit 6d7c75b2 authored by Marius Kriegerowski's avatar Marius Kriegerowski

TFRecord-io

parent c3ae3c00
......@@ -42,14 +42,75 @@ class WhiteNoise(Noise):
return num.random.random((n_channels, n_samples)).astype(num.float32) * self.level
class DataGenerator(Object):
class DataGeneratorBase(Object):
_shape = Tuple.T(2, Int.T(), optional=True)
fn_tfrecord = String.T(optional=True)
n_classes = Int.T(default=3)
@property
def tensor_shape(self):
return self._shape
@property
def generate_output_types(self):
return tf.float32, tf.float32
@tensor_shape.setter
def tensor_shape(self, v):
if v == self._shape:
return self._shape
else:
self._shape = v
def generate(self):
record_iterator = tf.python_io.tf_record_iterator(
path=self.fn_tfrecord)
for string_record in record_iterator:
example = tf.train.Example()
example.ParseFromString(string_record)
chunk = example.features.feature['data'].bytes_list.value[0]
label = example.features.feature['label'].bytes_list.value[0]
chunk = num.fromstring(chunk, dtype=num.float32)
chunk = chunk.reshape(self.tensor_shape)
label = num.fromstring(label, dtype=num.float32)
yield chunk, label
@property
def output_shapes(self):
return (self.tensor_shape, self.n_classes)
def get_dataset(self):
return tf.data.Dataset.from_generator(
self.generate,
self.generate_output_types,
output_shapes=self.output_shapes)
# def get_dataset(self):
# return tf.data.TFRecordDataset(filenames=[self.fn_tfrecord])
def write(self, directory):
'''Write data to TFRecordDataset using `self.writer`.'''
writer = tf.python_io.TFRecordWriter(directory)
for ydata, label in self.generate():
ex = tf.train.Example(
features=tf.train.Features(
feature={
'data': _BytesFeature(ydata.tobytes()),
'label': _BytesFeature(num.array(label, dtype=num.float32).tobytes()),
}))
writer.write(ex.SerializeToString())
class DataGenerator(DataGeneratorBase):
_shape = None
fn_stations = String.T()
need_same_dimensions = True # bin mir nicht sicher, ob das sein muss...
absolute = Bool.T(help='Use absolute amplitudes', default=False)
effective_deltat = Float.T(optional=True)
sample_length = Float.T(help='length [s] of data window')
fn_stations = String.T()
reference_target = Target.T(
default=Target(
codes=('', 'NKC', '', 'SHZ'),
......@@ -58,15 +119,23 @@ class DataGenerator(Object):
elevation=546))
noise = Noise.T(optional=True, help='Add noise to your feature chunks')
def __init__(self, *args, **kwargs):
super(DataGenerator, self).__init__(*args, **kwargs)
self.classes = ['north_shift', 'east_shift', 'depth']
self.n_classes = len(self.classes)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.setup()
def setup(self):
pass
def get_dataset(self):
return tf.data.Dataset.from_generator(
self.generate,
self.generate_output_types,
output_shapes=self.output_shapes)
@property
def output_shapes(self):
return (self.tensor_shape, self.n_classes)
def extract_labels(self, source):
n, e = orthodrome.latlon_to_ne(
self.reference_target.lat, self.reference_target.lon,
......@@ -79,8 +148,7 @@ class DataGenerator(Object):
When working with noisy data, replace this function.
'''
stencil = num.zeros(self.tensor_shape, dtype=num.float32)
return stencil
return num.zeros(self.tensor_shape, dtype=num.float32)
def attach_graph(self, dataset, shape):
'''
......@@ -115,36 +183,6 @@ class DataGenerator(Object):
chunk -= num.min(chunk)
chunk /= num.max(chunk)
@property
def tensor_shape(self):
return self._shape
@tensor_shape.setter
def tensor_shape(self, v):
if v == self._shape:
return self._shape
elif self._shape is not None and self.need_same_dimensions:
raise Exception("encountered differing tensor shapes %s, %s" % (
v, self._shape))
else:
self._shape = v
@property
def generate_output_types(self):
return tf.float32, tf.float32
def write(self, directory):
'''Write data to TFRecordDataset using `self.writer`.'''
writer = tf.python_io.TFRecordWriter(directory)
for ydata, label in self.generate():
ex = tf.train.Example(
features=tf.train.Features(
feature={
'data': _BytesFeature(ydata.tobytes()),
'label': _BytesFeature(num.array(label).tobytes()),
}))
writer.write(ex.SerializeToString())
class PileData(DataGenerator):
data_path = String.T()
......@@ -154,7 +192,7 @@ class PileData(DataGenerator):
def setup(self):
self.classes = ['north_shift', 'east_shift', 'depth']
# self.classes = ['north_shift', 'east_shift', 'depth']
self.data_pile = pile.make_pile(self.data_path, fileformat=self.data_format)
if self.data_pile.is_empty():
sys.exit('Data pile is empty!')
......@@ -220,19 +258,6 @@ class PileData(DataGenerator):
yield chunk, self.extract_labels(event)
class TFRecordData(DataGenerator):
# NOT TESTED YET
def __init__(self, input_fn, *args, **kwargs):
super(TFRecordData, self).__init__(*args, **kwargs)
self.input_fn = input_fn
def setup(self):
pass
def generate(self):
return tf.python_io.tf_record_iterator(self.input_fn)
class OnTheFlyData(DataGenerator):
gf_engine = Engine.T()
......
......@@ -15,7 +15,7 @@ logger = logging.getLogger('pinky.model')
class Model(Object):
data_generator = DataGenerator.T()
data_generator = DataGeneratorBase.T()
dropout_rate = Float.T(default=0.1)
batch_size = Int.T(default=10)
outdir = String.T(default='/tmp/dnn-seis')
......@@ -36,25 +36,15 @@ class Model(Object):
self.train()
def generate_input(self):
shape = (
self.data_generator.tensor_shape,
self.data_generator.n_classes)
dataset = tf.data.Dataset.from_generator(
self.data_generator.generate,
self.data_generator.generate_output_types,
output_shapes=shape)
dataset = self.data_generator.attach_graph(dataset, shape)
dataset = self.data_generator.get_dataset()
dataset = dataset.batch(self.batch_size)
dataset = dataset.repeat()
dataset = dataset.prefetch(buffer_size=self.batch_size)
return dataset.make_one_shot_iterator().get_next()
def time_axis_cnn(self, input, n_filters, cross_channel_kernel=1, name=None,
training=False):
def time_axis_cnn(self, input, n_filters, cross_channel_kernel=None,
kernel_width=1, name=None, training=False):
'''
CNN along horizontal axis
......@@ -64,15 +54,20 @@ class Model(Object):
'''
_, n_channels, n_samples, _ = input.shape
if cross_channel_kernel is None:
cross_channel_kernel = n_channels
with tf.variable_scope('conv_layer%s' %name):
# initializer = tf.truncated_normal_initializer(
initializer = tf.random_normal_initializer(
mean=0.5, stddev=0.1)
# mean=0.5, stddev=0.1)
mean=0., stddev=0.1)
input = tf.layers.conv2d(
inputs=input,
filters=n_filters, # dimensionality of output space *N of filters
kernel_size=(cross_channel_kernel, n_channels), # use identity (1) along channels
# kernel_size=(n_channels, kernel_width), # use identity (1) along channels
kernel_size=(cross_channel_kernel, kernel_width), # use identity (1) along channels
activation=tf.nn.relu,
bias_initializer=initializer,
name=name)
......@@ -105,30 +100,32 @@ class Model(Object):
conv = self.time_axis_cnn(conv, n_filters*4, 2, name='conv3',
training=training)
fc1 = tf.contrib.layers.flatten(conv)
fc1 = tf.layers.dense(fc1, 512, activation=tf.nn.relu)
fc = tf.contrib.layers.flatten(conv)
fc = tf.layers.dense(fc, 512, activation=tf.nn.relu)
# fc = tf.layers.dense(fc, 1024, activation=tf.nn.relu)
fc1 = tf.layers.dropout(
fc1, rate=self.dropout_rate,
fc = tf.layers.dropout(
fc, rate=self.dropout_rate,
training=training)
predictions = tf.layers.dense(fc1, self.data_generator.n_classes)
predictions = tf.layers.dense(fc, self.data_generator.n_classes)
variable_summaries(predictions, 'predictions')
labels = tf.Print(labels, [labels], "Labels: ")
predictions = tf.Print(predictions, [predictions], "Predictions: ")
# labels = tf.Print(labels, [labels], "Labels: ")
# predictions = tf.Print(predictions, [predictions], "Predictions: ")
# vector length
loss = tf.sqrt(tf.reduce_sum((predictions - labels) ** 2))/ self.batch_size
lossalternative= tf.losses.mean_squared_error(
labels,
predictions,
weights=1.0,
scope=None,
loss_collection=tf.GraphKeys.LOSSES,
reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
)
tf.summary.scalar('lossalternative', lossalternative)
loss = tf.Print(loss, [loss], "L2 error: ")
# lossalternative= tf.losses.mean_squared_error(
# labels,
# predictions,
# weights=1.0,
# scope=None,
# loss_collection=tf.GraphKeys.LOSSES,
# reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
# )
# tf.summary.scalar('lossalternative', lossalternative)
accuracy = tf.metrics.accuracy(
labels=labels,
......@@ -180,7 +177,7 @@ def main():
description='')
parser.add_argument('--config')
parser.add_argument('--train', action='store_true')
parser.add_argument('--to-tfrecord', metavar='FILENAME',
parser.add_argument('--write-tfrecord-model', metavar='FILENAME',
help='write data_generator out to FILENAME')
parser.add_argument('--from-tfrecord', metavar='FILENAME',
help='read tfrecord')
......@@ -200,27 +197,32 @@ def main():
if args.config:
model = guts.load(filename=args.config)
tf_config = None
if args.cpu:
tf_config = tf.ConfigProto(
device_count = {'GPU': 0}
)
if args.show_data:
from . import plot
plot.show_data(model)
if args.train:
tf_config = None
if args.cpu:
tf_config = tf.ConfigProto(
device_count = {'GPU': 0}
)
model(tf_config)
elif args.write_tfrecord_model:
fn_tfrecord = 'testfn_.yaml'
elif args.to_tfrecord:
model = guts.load(filename=args.config)
model.data_generator.write(args.to_tfrecord)
tfrecord_data_generator = DataGeneratorBase(fn_tfrecord=fn_tfrecord)
tfrecord_data_generator.tensor_shape = model.data_generator.tensor_shape
model.data_generator.write(fn_tfrecord)
model.data_generator = tfrecord_data_generator
model.dump(filename=args.write_tfrecord_model)
logger.info('Wrote new model file: %s' % args.write_tfrecord_model)
elif args.from_tfrecord:
...
logger.info('Reading data from %s' % args.from_tfrecord)
model.data_generator = TFRecordData(fn_tfrecord=args.from_tfrecord)
elif args.new_config:
fn_config = 'model.config'
if os.path.exists(fn_config):
print('file exists: %s' % fn_config)
......@@ -236,3 +238,6 @@ def main():
model.regularize()
model.dump(filename=fn_config)
print('created a fresh "%s"' % fn_config)
if args.train:
model(tf_config)
def nsl(tr):
return tr.nslc_id[:3]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment