Commit 86f7e50d authored by Michael Rudolf's avatar Michael Rudolf

Data-Preparation update

 - Added functionality to split an experiment into sets.
 - Automatically omit small sets when they are too short.
 - Subsets are now created from the sets.
 - The RAM usage per feature is estimated using the specified window 
parameters and np.nbytes().
 - preparation_main.py showcases the use of the preparation functions.
 - A documentation of the data preparation is available in the 
Jupyter-Notebook.
 - Added some hdf5 functionality for slicing and reading/writing 
dictionaries (partially from 
https://gitext.gfz-potsdam.de/analab-code/rst-stick-slipy).
parent ef0052fc
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
#!/usr/bin/env python3
'''
preparation_main.py
Script to run for data preparation.
__AUTHOR__: Michael Rudolf
__DATE__: 19-Feb-2019
'''
# Import the necessary modules
import importlib
import h5py
import numpy as np
import os
import shutil
import logging
import matplotlib.pyplot as plt
import preparation
# List of file paths, depending on where I run the script
file_paths = {
'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5'
}
file_path = file_paths['lab']
# Create Sets and get back the new files as list
print('Creating sets...')
set_file_list = preparation.create_sets(file_path)
'''
Standard parameters for the subsets
window=30
step_frac=1
min_cycles=5
min_win=10
'''
# Findout which sets have to be omitted (also gets duration of longest cycle)
print('Calculating which sets to omit...')
(omit_total, max_dur) = preparation.omit_sets(set_file_list)
# Filter out the omitted datasets
set_list_new = [x for (i, x) in enumerate(set_file_list)
if i not in omit_total]
# Create subsets
print('Creating subsets...')
subset_dir_name = preparation.create_subsets(set_list_new, max_dur)
# Visualize
print('Visualizing the subsets...')
preparation.visualize_subsets(subset_dir_name)
# Calculate ram usage per feature
print('Calculating RAM usage...')
ram_usage = preparation.ram_per_feature(set_file_list)
print('Done!')
print('Created', len(set_list_new),
'subsets from', len(set_file_list), 'sets.')
print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment