utils.py 4.88 KB
Newer Older
Michael Rudolf's avatar
Michael Rudolf committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
#!/usr/bin/env python3

'''
utils.py

Contains helper functions for various purposes.


__AUTHOR__: Michael Rudolf
__DATE__: 25-Mar-2019

'''

# Python Modules
from ete3 import Tree
import h5py
import numpy as np


def show_h5_contents(cur_file):
    ''' Displays the content of a h5file in a structured format '''
    with h5py.File(cur_file, 'r') as exp:
        tree_string = extract_keys_asTree(exp)
        tree = Tree(tree_string+';', format=1)
    print(tree.get_ascii(show_internal=True))


def extract_keys_asTree(f):
    ''' Generates a string for use with the ete3 TreeObject '''
    tree_string = '('
    for key in f.keys():
        add = ''

        if f[key].__class__ == h5py._hl.group.Group:
            tree_string += extract_keys_asTree(f[key])
        else:
            typ = str(f[key].dtype)
            shp = str(f[key].shape)
            if shp.endswith('()') or shp.endswith(',)'):
                shp = shp[0:-1]+'0)'
                shp = shp.replace(',', 'x')
            add = '('+shp+'--->'+typ+')'
        tree_string += add+'--'+key+','
    tree_string = tree_string[0:-1]+')'
    return tree_string


def hdf5group_to_dict(h5group):
    '''
    Returns a dictionary with each element in the h5group.
    '''
    out_dict = dict()
    for dset in h5group.keys():
        out_dict[dset] = h5group[dset][()]
    return out_dict


def dict_to_hdf5(h5_file, dicty):
    '''
    Recursive function saving a dictionary into a hdf5 file. If data is present
    as a numpy array, the data is automatically compressed and chunked using
    h5py's standard settings.
    '''
    for (name, content) in dicty.items():
        if content.__class__.__name__ == 'dict':
            grp = h5_file.create_group(name)
            dict_to_hdf5(grp, content)
        elif content.__class__.__name__ == 'ndarray':
            h5_file.create_dataset(name, data=content, compression="gzip")
        else:
            h5_file.create_dataset(name, data=content)
    return h5_file


def slice_hdf5(file_path, slice_set, set_dir_name, fname_set):
    '''
    Slices the data in a h5 file and saves it in a new folder

    Keywords:
        file_path -- The path to the input h5 file
        slice_set -- A slice() object which defines where to cut the data
        set_dir_name -- The new folder where the sliced file has to go
        fname_set -- The name of the output h5 file
    '''
    # Read out actual data from file
    with h5py.File(file_path, 'r') as exp:
        try:
            normal = np.around(np.mean(exp['data']['normal']), decimals=-3)
            len_data = len(exp['data']['shear'])
            shear = exp['data']['shear'][slice_set]
            lid_disp = exp['data']['lid_disp'][slice_set]
            velocity = exp['data']['velocity'][slice_set]
            load_disp = exp['data']['load_disp'][slice_set]
        except KeyError:
            normal = exp['normal'][()]
            len_data = len(exp['shear'])
            shear = exp['shear'][slice_set]
            lid_disp = exp['lid_disp'][slice_set]
            velocity = exp['velocity'][slice_set]
            load_disp = exp['load_disp'][slice_set]

        if slice_set.start < 0:
            # Change slice for propagation of eqs (relative to absolute)
            new_start = len_data+slice_set.start
            new_end = len_data+slice_set.stop
            slice_set = slice(new_start, new_end)
        eqs = hdf5group_to_dict(exp['eqs'])
        Fs = exp['Fs'][()]

    # Create a slice for the preprocessed event database (eqs)
    eq = eqs['eqi']
    start = np.searchsorted(eq, slice_set.start, 'left')
    end = np.searchsorted(eq, slice_set.stop, 'right')
    slice_eqs = slice(start, end)
    # Create a new database, now referenced to the starting point of the set
    eqs_new = dict()
    for eqn in eqs.keys():
        eqs_new[eqn] = eqs[eqn][slice_eqs]-slice_set.start
    # Remove entries where the end is after the end of the set
    for i, eq in enumerate(eqs_new['eqe']):
        if eq > len(shear):
            for key in eqs_new.keys():
                eqs_new[key][i] = 0
    for key in eqs_new.keys():
        eqs_new[key] = eqs_new[key][eqs_new[key] != 0]
    try:
        for i, eq in enumerate(eqs_new['eqf']):
            if eq > len(shear):
                for key in eqs_new.keys():
                    eqs_new[key][i] = 0
        for key in eqs_new.keys():
            eqs_new[key] = eqs_new[key][eqs_new[key] != 0]
    except KeyError:
        pass

    # Store in set file
    with h5py.File(set_dir_name+'/'+fname_set, 'w') as setf:
        setf.create_dataset('shear', data=shear, compression='gzip')
        setf.create_dataset('lid_disp', data=lid_disp, compression='gzip')
        setf.create_dataset('velocity', data=velocity, compression='gzip')
        setf.create_dataset('load_disp', data=load_disp, compression='gzip')
        setf.create_dataset('Fs', data=Fs)
        setf.create_dataset('normal', data=normal)
        dict_to_hdf5(setf, {'eqs': eqs_new})