Commit 86f7e50d authored by Michael Rudolf's avatar Michael Rudolf

Data-Preparation update

 - Added functionality to split an experiment into sets.
 - Automatically omit small sets when they are too short.
 - Subsets are now created from the sets.
 - The RAM usage per feature is estimated using the specified window 
parameters and np.nbytes().
 - showcases the use of the preparation functions.
 - A documentation of the data preparation is available in the 
 - Added some hdf5 functionality for slicing and reading/writing 
dictionaries (partially from
parent ef0052fc
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
#!/usr/bin/env python3
Script to run for data preparation.
__AUTHOR__: Michael Rudolf
__DATE__: 19-Feb-2019
# Import the necessary modules
import importlib
import h5py
import numpy as np
import os
import shutil
import logging
import matplotlib.pyplot as plt
import preparation
# List of file paths, depending on where I run the script
file_paths = {
'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5'
file_path = file_paths['lab']
# Create Sets and get back the new files as list
print('Creating sets...')
set_file_list = preparation.create_sets(file_path)
Standard parameters for the subsets
# Findout which sets have to be omitted (also gets duration of longest cycle)
print('Calculating which sets to omit...')
(omit_total, max_dur) = preparation.omit_sets(set_file_list)
# Filter out the omitted datasets
set_list_new = [x for (i, x) in enumerate(set_file_list)
if i not in omit_total]
# Create subsets
print('Creating subsets...')
subset_dir_name = preparation.create_subsets(set_list_new, max_dur)
# Visualize
print('Visualizing the subsets...')
# Calculate ram usage per feature
print('Calculating RAM usage...')
ram_usage = preparation.ram_per_feature(set_file_list)
print('Created', len(set_list_new),
'subsets from', len(set_file_list), 'sets.')
print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment