Commit 6172e455 authored by Marius Kriegerowski's avatar Marius Kriegerowski

shuffle

parent 7b0b5943
......@@ -87,10 +87,6 @@ class DataGeneratorBase(Object):
self.generate_output_types,
output_shapes=self.output_shapes)
# def get_dataset(self):
# return tf.data.TFRecordDataset(filenames=[self.fn_tfrecord])
def write(self, directory):
'''Write data to TFRecordDataset using `self.writer`.'''
writer = tf.python_io.TFRecordWriter(directory)
......@@ -104,7 +100,6 @@ class DataGeneratorBase(Object):
writer.write(ex.SerializeToString())
class DataGenerator(DataGeneratorBase):
absolute = Bool.T(help='Use absolute amplitudes', default=False)
......@@ -119,6 +114,12 @@ class DataGenerator(DataGeneratorBase):
elevation=546))
noise = Noise.T(optional=True, help='Add noise to your feature chunks')
highpass = Float.T(optional=True)
lowpass = Float.T(optional=True)
highpass_order = Int.T(default=4, optional=True)
lowpass_order = Int.T(default=4, optional=True)
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.setup()
......@@ -126,12 +127,6 @@ class DataGenerator(DataGeneratorBase):
def setup(self):
pass
def get_dataset(self):
return tf.data.Dataset.from_generator(
self.generate,
self.generate_output_types,
output_shapes=self.output_shapes)
@property
def output_shapes(self):
return (self.tensor_shape, self.n_classes)
......@@ -177,7 +172,8 @@ class DataGenerator(DataGeneratorBase):
istop_array = istart_array + (data_len - 2* istart_trace)
ydata = tr.data[ \
istart_trace: min(data_len, self.n_samples_max-istart_array)+istart_trace]
istart_trace: min(data_len, self.n_samples_max-istart_array) \
+ istart_trace]
chunk[i, istart_array: istart_array+ydata.shape[0]] += ydata
chunk -= num.min(chunk)
......@@ -192,12 +188,14 @@ class PileData(DataGenerator):
def setup(self):
# self.classes = ['north_shift', 'east_shift', 'depth']
self.data_pile = pile.make_pile(self.data_path, fileformat=self.data_format)
self.data_pile = pile.make_pile(
self.data_path, fileformat=self.data_format)
if self.data_pile.is_empty():
sys.exit('Data pile is empty!')
self.deltat_want = self.deltat_want or min(self.data_pile.deltats.keys())
self.deltat_want = self.deltat_want or \
min(self.data_pile.deltats.keys())
self.n_samples_max = int(self.sample_length/self.deltat_want)
markers = marker.load_markers(self.fn_markers)
marker.associate_phases_to_events(markers)
......@@ -224,9 +222,9 @@ class PileData(DataGenerator):
'Different sampling rates in dataset. Preprocessing slow')
def preprocess(self, tr):
if tr.delta - self.deltat_want < -EPSILON:
if tr.delta - self.deltat_want > EPSILON:
tr.resample(self.deltat_want)
elif tr.deltat - self.deltat_want > EPSILON:
elif tr.deltat - self.deltat_want < -EPSILON:
tr.downsample_to(self.deltat_want)
def generate(self):
......
......@@ -16,12 +16,14 @@ logger = logging.getLogger('pinky.model')
class Model(Object):
data_generator = DataGeneratorBase.T()
dropout_rate = Float.T(default=0.1)
dropout_rate = Float.T(optional=True)
batch_size = Int.T(default=10)
outdir = String.T(default='/tmp/dnn-seis')
auto_clear = Bool.T(default=True)
summary_outdir= String.T(default='summary')
summary_nth_step = Int.T(default=1)
shuffle_size = Int.T(
optional=True, help='if set, shuffle examples at given buffer size.')
def __call__(self, tf_config=None):
......@@ -38,6 +40,8 @@ class Model(Object):
def generate_input(self):
dataset = self.data_generator.get_dataset()
dataset = dataset.batch(self.batch_size)
if self.shuffle_size:
dataset = dataset.shuffle(buffer_size=self.shuffle_size)
dataset = dataset.repeat()
dataset = dataset.prefetch(buffer_size=self.batch_size)
......@@ -61,7 +65,7 @@ class Model(Object):
# initializer = tf.truncated_normal_initializer(
initializer = tf.random_normal_initializer(
# mean=0.5, stddev=0.1)
mean=0., stddev=0.1)
mean=0.0, stddev=0.1)
input = tf.layers.conv2d(
inputs=input,
......@@ -76,8 +80,10 @@ class Model(Object):
# extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
input = tf.layers.max_pooling2d(
input,
pool_size=(1, 2), # pool_size (height, width)
pool_size=(2, 2), # pool_size (height, width)
strides=(1, 2)
# pool_size=(1, 2), # pool_size (height, width)
# strides=(1, 2)
)
tf.summary.image('post-%s' % name, tf.split(input, num_or_size_splits=n_filters, axis=-1)[0])
variable_summaries(input, name)
......@@ -104,9 +110,9 @@ class Model(Object):
fc = tf.layers.dense(fc, 512, activation=tf.nn.relu)
# fc = tf.layers.dense(fc, 1024, activation=tf.nn.relu)
fc = tf.layers.dropout(
fc, rate=self.dropout_rate,
training=training)
if self.dropout_rate:
fc = tf.layers.dropout(
fc, rate=self.dropout_rate, training=training)
predictions = tf.layers.dense(fc, self.data_generator.n_classes)
variable_summaries(predictions, 'predictions')
......@@ -115,8 +121,14 @@ class Model(Object):
# predictions = tf.Print(predictions, [predictions], "Predictions: ")
# vector length
loss = tf.sqrt(tf.reduce_sum((predictions - labels) ** 2))/ self.batch_size
loss = tf.Print(loss, [loss], "L2 error: ")
errors = predictions - labels
tf.summary.scalar('error_z_mean', tf.reduce_mean(errors[-1]))
tf.summary.scalar('error_lateral_mean',
tf.reduce_mean(tf.sqrt(tf.reduce_sum(errors[0: 1]**2,
keepdims=False))))
loss = tf.reduce_mean(tf.sqrt(tf.reduce_sum((predictions - labels) ** 2, axis=1,
keepdims=False)))
loss = tf.Print(loss, [loss], "Mean Euclidian Error [m]: ")
# lossalternative= tf.losses.mean_squared_error(
# labels,
# predictions,
......@@ -127,20 +139,11 @@ class Model(Object):
# )
# tf.summary.scalar('lossalternative', lossalternative)
accuracy = tf.metrics.accuracy(
labels=labels,
predictions=predictions,
name='acc_op')
metrics = {
'accuracy': accuracy,
}
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(
learning_rate=params['learning_rate'])
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
tf.summary.scalar('accuracy', accuracy[1])
# tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
return tf.estimator.EstimatorSpec(
......@@ -150,8 +153,7 @@ class Model(Object):
training_hooks=[self.get_summary_hook()])
elif mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=metrics)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss)
def get_summary_hook(self):
return tf.train.SummarySaverHook(
......@@ -208,7 +210,8 @@ def main():
plot.show_data(model)
elif args.write_tfrecord_model:
fn_tfrecord = 'testfn_.yaml'
import uuid
fn_tfrecord = 'dump_%s.tfrecord' % str(uuid.uuid4())
tfrecord_data_generator = DataGeneratorBase(fn_tfrecord=fn_tfrecord)
tfrecord_data_generator.tensor_shape = model.data_generator.tensor_shape
......@@ -231,7 +234,7 @@ def main():
gf_engine = LocalEngine(
use_config=True,
store_superdirs=['/data/stores'],
default_store_id='qplayground_total_4_mr_full')
default_store_id='vogtland_001')
data_generator = OnTheFlyData(fn_stations='stations.pf', gf_engine=gf_engine)
model = Model(data_generator=data_generator)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment