Commit 23397565 authored by Michael Rudolf's avatar Michael Rudolf

First touches in FeatureGeneration

- Added a first base for feature generation
- Changed the preparation_main script to use all files in the 
ExampleData folder
parent d5b64d3e
......@@ -167,7 +167,7 @@
"source": [
"window=30\n",
"step_frac=1\n",
"min_cycles=3\n",
"min_cycles=5\n",
"min_win=10\n",
"\n",
"''' The function preparation.omit_sets() does this part'''\n",
......@@ -290,6 +290,17 @@
"fig.show()\n",
"fig.savefig(subset_dir_name+'/OverviewOfSubsets')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with h5py.File(subset_dir_name+'/'+subset_file, 'r') as subset:\n",
" subset['lid_disp'][()]\n",
" "
]
}
],
"metadata": {
......
......@@ -24,44 +24,50 @@ import preparation
# List of file paths, depending on where I run the script
file_paths = {
'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5',
'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/b_5kPa_371-01-27-GB300.h5'
'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/',
'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/0-data-preparation/ExampleData/'
}
file_path = file_paths['lab']
base_path = file_paths['lab']
file_list = [base_path+file for file in os.listdir(base_path) if file.endswith('h5')]
# Create Sets and get back the new files as list
print('Creating sets...')
set_file_list = preparation.create_sets(file_path)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
'''
Standard parameters for the subsets
window=30
step_frac=1
min_cycles=5
min_win=10
'''
# Findout which sets have to be omitted (also gets duration of longest cycle)
print('Calculating which sets to omit...')
(omit_total, max_dur) = preparation.omit_sets(set_file_list)
# Filter out the omitted datasets
set_list_new = [x for (i, x) in enumerate(set_file_list)
if i not in omit_total]
# Create subsets
print('Creating subsets...')
subset_dir_name = preparation.create_subsets(set_list_new, max_dur)
# Visualize
print('Visualizing the subsets...')
preparation.visualize_subsets(subset_dir_name)
# Calculate ram usage per feature
print('Calculating RAM usage...')
ram_usage = preparation.ram_per_feature(set_file_list)
print('Done!')
print('Created', len(set_list_new),
'subsets from', len(set_file_list), 'sets.')
print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))
for file_path in file_list:
# Create Sets and get back the new files as list
print('Creating sets...')
set_file_list = preparation.create_sets(file_path)
'''
Standard parameters for the subsets
window=30
step_frac=1
min_cycles=5
min_win=10
'''
# Findout which sets have to be omitted (also gets duration of longest cycle)
print('Calculating which sets to omit...')
(omit_total, max_dur) = preparation.omit_sets(set_file_list)
# Filter out the omitted datasets
set_list_new = [x for (i, x) in enumerate(set_file_list)
if i not in omit_total]
# Create subsets
print('Creating subsets...')
subset_dir_name = preparation.create_subsets(set_list_new, max_dur)
# Visualize
print('Visualizing the subsets...')
preparation.visualize_subsets(subset_dir_name)
# Calculate ram usage per feature
print('Calculating RAM usage...')
ram_usage = preparation.ram_per_feature(set_file_list)
print('Done!')
print('Created', len(set_list_new),
'subsets from', len(set_file_list), 'sets.')
print('Each feature will need %.2f kB of RAM!' % (ram_usage/1024))
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import the necessary modules\n",
"import importlib\n",
"import h5py\n",
"import numpy as np\n",
"import os\n",
"import shutil\n",
"import logging\n",
"import matplotlib.pyplot as plt\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"\n",
"# List of file paths, depending on where I run the script\n",
"file_paths = {\n",
" 'home_office': 'C:/Users/Michael/ownCloud/DocStelle/GitRepos/shear-madness/1-feature-generation/ExampleData/b_5kPa_371-01-27-GB300_subsets/',\n",
" 'lab': 'C:/Users/M.Rudolf/ownCloud/DocStelle/GitRepos/shear-madness/1-feature-generation/ExampleData/b_5kPa_371-01-27-GB300_subsets/',\n",
" 'office': '~/home/mrudolf/ownCloud/DocStelle/GitRepos/shear-madness/1-feature-generation/ExampleData/b_5kPa_371-01-27-GB300_subsets/'\n",
"}\n",
"file_path = file_paths['lab']\n",
"\n",
"file_list = [f for f in os.listdir(file_path) if f.endswith('.h5')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for (i, file) in enumerate(file_list):\n",
" with h5py.File(file_path+file) as hf:\n",
" if i < 1:\n",
" shear = hf['shear'][()][:, None]\n",
" else:\n",
" shear = np.hstack([shear, hf['shear'][()][:, None]])\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Workflow\n",
"1. Take window\n",
"2. Detrend\n",
"3. Filter\n",
"4. Feature"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment