preparation_main.py 2.11 KB
Newer Older
Michael Rudolf's avatar
Michael Rudolf committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#!/usr/bin/env python3

'''
preparation_main.py

Script to run for data preparation.


__AUTHOR__: Michael Rudolf
__DATE__: 19-Feb-2019

'''

# Import the necessary modules
import importlib
import h5py
import numpy as np
import os
import shutil
import logging
import matplotlib.pyplot as plt

Michael Rudolf's avatar
Michael Rudolf committed
23
import modules.preparation as preparation
Michael Rudolf's avatar
Michael Rudolf committed
24 25 26

# List of file paths, depending on where I run the script
file_paths = {
27 28
    'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
    'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
Michael Rudolf's avatar
Michael Rudolf committed
29
    'lab2': 'G:/RST/VST-Paper_Data/python/py_GB300_SpringA/',
30
    'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/'
Michael Rudolf's avatar
Michael Rudolf committed
31
}
Michael Rudolf's avatar
Michael Rudolf committed
32
base_path = file_paths['lab2']
33
file_list = [base_path+file for file in os.listdir(base_path) if file.endswith('h5')]
Michael Rudolf's avatar
Michael Rudolf committed
34

35 36
logger = logging.getLogger()
logger.setLevel(logging.INFO)
Michael Rudolf's avatar
Michael Rudolf committed
37 38


39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
for file_path in file_list:
    # Create Sets and get back the new files as list
    print('Creating sets...')
    set_file_list = preparation.create_sets(file_path)

    '''
    Standard parameters for the subsets
    window=30
    step_frac=1
    min_cycles=5
    min_win=10
    '''

    # Findout which sets have to be omitted (also gets duration of longest cycle)
    print('Calculating which sets to omit...')
    (omit_total, max_dur) = preparation.omit_sets(set_file_list)

    # Filter out the omitted datasets
    set_list_new = [x for (i, x) in enumerate(set_file_list)
                    if i not in omit_total]

    # Create subsets
    print('Creating subsets...')
    subset_dir_name = preparation.create_subsets(set_list_new, max_dur)

    # Visualize
    print('Visualizing the subsets...')
    preparation.visualize_subsets(subset_dir_name)

    # Calculate ram usage per feature
    print('Calculating RAM usage...')
    ram_usage = preparation.ram_per_feature(set_file_list)
    print('Done!')
    print('Created', len(set_list_new),
          'subsets from', len(set_file_list), 'sets.')
    print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))