preparation_main.py 2.11 KB
Newer Older
Michael Rudolf's avatar
Michael Rudolf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/env python3

'''
preparation_main.py

Script to run for data preparation.


__AUTHOR__: Michael Rudolf
__DATE__: 19-Feb-2019

'''

# Import the necessary modules
import importlib
import h5py
import numpy as np
import os
import shutil
import logging
import matplotlib.pyplot as plt

Michael Rudolf's avatar
Michael Rudolf committed
23
import modules.preparation as preparation
Michael Rudolf's avatar
Michael Rudolf committed
24
25
26

# List of file paths, depending on where I run the script
file_paths = {
27
28
    'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
    'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
Michael Rudolf's avatar
Michael Rudolf committed
29
    'lab2': 'G:/RST/VST-Paper_Data/python/py_GB300_SpringA/',
30
    'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/'
Michael Rudolf's avatar
Michael Rudolf committed
31
}
Michael Rudolf's avatar
Michael Rudolf committed
32
base_path = file_paths['lab2']
33
file_list = [base_path+file for file in os.listdir(base_path) if file.endswith('h5')]
Michael Rudolf's avatar
Michael Rudolf committed
34

35
36
logger = logging.getLogger()
logger.setLevel(logging.INFO)
Michael Rudolf's avatar
Michael Rudolf committed
37
38


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
for file_path in file_list:
    # Create Sets and get back the new files as list
    print('Creating sets...')
    set_file_list = preparation.create_sets(file_path)

    '''
    Standard parameters for the subsets
    window=30
    step_frac=1
    min_cycles=5
    min_win=10
    '''

    # Findout which sets have to be omitted (also gets duration of longest cycle)
    print('Calculating which sets to omit...')
    (omit_total, max_dur) = preparation.omit_sets(set_file_list)

    # Filter out the omitted datasets
    set_list_new = [x for (i, x) in enumerate(set_file_list)
                    if i not in omit_total]

    # Create subsets
    print('Creating subsets...')
    subset_dir_name = preparation.create_subsets(set_list_new, max_dur)

    # Visualize
    print('Visualizing the subsets...')
    preparation.visualize_subsets(subset_dir_name)

    # Calculate ram usage per feature
    print('Calculating RAM usage...')
    ram_usage = preparation.ram_per_feature(set_file_list)
    print('Done!')
    print('Created', len(set_list_new),
          'subsets from', len(set_file_list), 'sets.')
    print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))