Commit 407300db authored by Michael Rudolf's avatar Michael Rudolf

Comment updates

 - Added more comments
 - Removed add_h5.py which was a debug remainer
 - Restructured requirements.txt
parent 76724402
#!/usr/bin/env python3
import h5py
from tkinter import Tk, filedialog
import utils
from tqdm import tqdm
root = Tk()
root.withdraw()
file_list = filedialog.askopenfilenames(
filetypes=(('hdf', '*.h5'),)
)
root.destroy()
for file_path in tqdm(file_list):
f_main = h5py.File(file_path, 'r+')
try:
del f_main['eqs']
except KeyError as _:
pass
f_eqs = h5py.File(file_path.replace('.h5', '_eqs.h5'), 'r+')
eqs = dict()
for k in f_eqs.keys():
eqs[k] = f_eqs[k]
in_dict = {
'eqs': eqs
}
utils.dict_to_hdf5(f_main, in_dict)
\ No newline at end of file
......@@ -3,6 +3,8 @@
'''
eventfinder.py: Finding events and computing simple statistics
Only needed to separate velocities at this stage.
__AUTHOR__: Michael Rudolf
__DATE__: 16-Aug-2018
......
......@@ -3,8 +3,13 @@
'''
feature_functions.py
Module containing all feature funtions to be generated.
Module containing all feature funtions to be generated. feature-generation.py
takes a look into this file and loops over all functions in here.
All feature functions are required to take a 1D-matrix as input and output a
scalar value. The feature generation script takes the name of the function as
the name of the feature. The function should be prefixed with 'do_' which is
later removed.
__AUTHOR__: Jon Bedford, Michael Rudolf
__DATE__: 26-Feb-2019
......@@ -91,7 +96,7 @@ def do_autocorrelation(x):
def do_binned_entropy(x):
max_bins = 5
hist, bin_edges = np.histogram(x, bins=max_bins)
hist, _ = np.histogram(x, bins=max_bins)
probs = hist / len(x)
probs = probs[np.nonzero(probs)]
return -np.sum(probs * np.log(probs))
......
......@@ -5,7 +5,6 @@ feature_generation.py
Module that generates all features.
__AUTHOR__: Michael Rudolf
__DATE__: 26-Feb-2019
......@@ -33,6 +32,19 @@ import modules.utils as utils
def run(prj):
'''
Runs a full feature extraction for the given project.
Utilizes more_itertools.windowed to iterate over a hdf5 dataset returning a
dictionary containing the features as a m by n matrix and the feature names
as an 1 by n list.
Furthermore the matrix is saved to disk as a hdf5 file for quick access
later on.
Input
prj: A valid project file containing the moving window parameters and
file paths.
Output
feature_dict: Dictionary with results
'''
# Moving Windows Parameters
......@@ -69,6 +81,7 @@ def run(prj):
shear = hf['shear']
lid = hf['lid_disp']
# Calculate shape of feature matrix for preallocation
nfeat = len(f_list)
nwins = int(np.floor((len(shear)-step)/step))+1
......@@ -124,7 +137,15 @@ def run(prj):
def create_features(window, f_list):
''' Uses the functions given in f_list to calculate features '''
'''
Uses the functions given in f_list to calculate features
Input:
window: 1D array containing the data
f_list: list of functions to be applied to the window
Output:
features: 1D array of results
'''
features = np.zeros(len(f_list))
for (i, fnc) in enumerate(f_list):
features[i] = fnc[1](window)
......
......@@ -9,8 +9,10 @@ functions should be added here.
def event_label(parameter_list):
pass
""" Labels features according to eqs-events from an additional file."""
raise(NotImplementedError)
def time_to_fail(parameter_list):
pass
""" Labels features by time to failure """
raise(NotImplementedError)
......@@ -18,7 +18,7 @@ from scipy.signal import butter, filtfilt
def filter_data(data, cutoff, fs):
flat_data = get_flattened(data)
filter_data = butterworth_filter(data, cutoff, fs)
filter_data = butterworth_filter(flat_data, cutoff, fs)
return filter_data
......
......@@ -48,23 +48,24 @@ def project(**kwargs):
def create_default_project(file_path='default_parameters.smad'):
''' Contains the default entries for the project '''
prj = configparser.ConfigParser()
# Basic parameters and paths for computation
prj['params'] = {
'directory': '',
'file_path': file_path,
'data_path': file_path.replace('smad', 'h5'),
'lowpass_cutoff': 150,
'lowpass_order': 5,
'min_cycles': 5,
'min_win': 10,
'step_frac': 1,
'window': 30,
'max_dur': '',
'num_sets': '',
'num_subsets': '',
'use_filter': False,
'eqs_path': file_path.replace('.smad', '_eqs.h5'),
'lowpass_cutoff': 150, # Cutoff frequency for lowpass filter
'lowpass_order': 5, # (Unused currently), filter order
'min_cycles': 5, # Minimum number of seismic cycles in subset
'min_win': 10, # Minimum number of windows per cycle
'step_frac': 1, # Step fraction of sliding window (step/window)
'window': 30, # Window size
'max_dur': '', # Longest cycle in set (in sample units)
'num_sets': '', # Total number of sets
'num_subsets': '', # Total number of subsets
'use_filter': False, # Use flattening and filtering
}
# Tracks state and paths of project
prj['state'] = {
'feature_file': '',
'features_created': False,
......@@ -78,24 +79,28 @@ def create_default_project(file_path='default_parameters.smad'):
def update_param(prj, **kwargs):
""" Updates a list of parameters in the project """
for k in kwargs.keys():
prj['params'][k] = kwargs[k]
return save_project(prj)
def update_state(prj, **kwargs):
""" Updates a list of states in the project """
for k in kwargs.keys():
prj['state'][k] = kwargs[k]
return save_project(prj)
def save_project(prj):
""" Saves the project file to disk """
with open(prj['params']['file_path'], 'w') as prj_file:
prj.write(prj_file)
return read_project(prj)
def read_project(prj):
""" Reads project from file """
with open(prj['params']['file_path'], 'rt') as prj_file:
prj.read_file(prj_file)
return prj
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment