Commit 7aad3349 authored by Javier Quinteros's avatar Javier Quinteros
Browse files

Add type hints and docstrings

parent 9567ccec
......@@ -22,6 +22,7 @@ import os
from copy import copy
from datetime import timedelta
from obspy import UTCDateTime
from obspy import Trace
from abc import ABCMeta
from abc import abstractmethod
......@@ -30,20 +31,22 @@ class Archive(metaclass=ABCMeta):
"""Base class for the different structures an archive can have"""
@abstractmethod
def __init__(self, root, experiment, strictcheck):
def __init__(self, root: str, experiment: str, strictcheck: bool):
"""Define the root directory where files should be archived and
whether the archival process should strictly check a coherency in
the order of the records.
:param root: Directory where files should be archived
:type root: str
:param experiment: Initial and common part of the filenames
:type experiment: str
:param strictcheck: Flag declaring whether to check that this chunk can be appended in case of existing data
:type strictcheck: bool
"""
pass
@abstractmethod
def archive(self, trace):
def archive(self, trace: Trace):
"""Archive mseed
:param trace: Trace to archive
......@@ -57,14 +60,14 @@ class StreamBasedHour(Archive):
Output file should not contain GAPS"""
def __init__(self, root='.', experiment=None, strictcheck=True):
def __init__(self, root: str = '.', experiment: str = None, strictcheck: bool = True):
"""Define the root directory where files should be archived and
whether the archival process should strictly check a coherency in
the order of the records.
:param root: Directory where files should be archived
:type root: str
:param experiment: Name of the experiment
:param experiment: Initial and common part of the filenames
:type experiment: str
:param strictcheck: Flag declaring whether to check that this chunk can be appended in case of existing data
:type strictcheck: bool
......@@ -74,7 +77,7 @@ class StreamBasedHour(Archive):
self.__strictcheck = strictcheck
self.__add2files = set()
def archive(self, trace):
def archive(self, trace: Trace):
"""Archive mseed
:param trace: Trace to archive
......@@ -138,13 +141,15 @@ class StreamBased(Archive):
GAPS will not be removed from the output file"""
def __init__(self, root='.', experiment=None, strictcheck=True):
def __init__(self, root: str = '.', experiment: str = None, strictcheck: bool = True):
"""Define the root directory where files should be archived and
whether the archival process should strictly check a coherency in
the order of the records.
:param root: Directory where files should be archived
:type root: str
:param experiment: Initial and common part of the filenames
:type experiment: str
:param strictcheck: Flag declaring whether to check that this chunk can be appended in case of existing data
:type strictcheck: bool
"""
......@@ -154,7 +159,7 @@ class StreamBased(Archive):
self.__add2files = set()
self.__uniquestart = None
def archive(self, trace):
def archive(self, trace: Trace):
"""Archive mseed
:param trace: Trace to archive
......@@ -202,7 +207,7 @@ class StreamBased(Archive):
class SDS(Archive):
"""Class to archive miniSEED in an SDS structure"""
def __init__(self, root='.', experiment=None, strictcheck=True):
def __init__(self, root: str = '.', experiment: str = None, strictcheck: bool = True):
"""Define the root directory of the SDS structure
The structure is defined as
......@@ -210,6 +215,8 @@ class SDS(Archive):
:param root: Root directory of the SDS structure
:type root: str
:param experiment: Initial and common part of the filenames
:type experiment: str
:param strictcheck: Flag to declare if the miniSEED chunk should always be parsed to check proper directory structure
:type strictcheck: bool
"""
......@@ -219,7 +226,7 @@ class SDS(Archive):
if strictcheck:
logging.warning('Strict Check was not implemented in SDS class')
def archive(self, trace):
def archive(self, trace: Trace):
"""Archive mseed
:param trace: Trace to archive
......
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
###################################################################################################
# (C) 2021 Helmholtz Centre Potsdam GFZ German Research Centre for Geosciences, Potsdam, Germany #
# #
# This file is part of dastools. #
# #
# dastools is free software: you can redistribute it and/or modify it under the terms of the GNU #
# General Public License as published by the Free Software Foundation, either version 3 of the #
# License, or (at your option) any later version. #
# #
# dastools is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without #
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License along with this program. If #
# not, see https://www.gnu.org/licenses/. #
###################################################################################################
"""dasconv tool
This file is part of dastools.
dastools is free software: you can redistribute it and/or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
dastools is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If
not, see https://www.gnu.org/licenses/.
:Copyright:
2021 Helmholtz Centre Potsdam GFZ German Research Centre for Geosciences, Potsdam, Germany
:License:
GPLv3
:Platform:
Linux
.. moduleauthor:: Javier Quinteros <javier@gfz-potsdam.de>, GEOFON, GFZ Potsdam
"""
import argparse
import sys
......@@ -30,13 +38,14 @@ import dastools.archive as da
import inspect
def nslc(dataheader):
def nslc(dataheader: dict) -> str:
"""Get a NSLC code from a dictionary with its components
:param dataheader: Dictionry with components of a NSLC code
:param dataheader: Dictionary with components of a NSLC code
:type dataheader: dict
:return: NSLC code
:rtype: str
:raise KeyError: if keys 'network', 'station', 'location', or 'channel' are not present
"""
return '%s.%s.%s.%s' % (dataheader['network'].upper(), dataheader['station'].upper(),
dataheader['location'].upper(), dataheader['channel'].upper())
......@@ -53,13 +62,14 @@ def printmetadata(data):
print(data)
def str2date(dstr):
def str2date(dstr: str) -> datetime.datetime:
"""Transform a string to a datetime.
:param dstr: A datetime in ISO format.
:type dstr: string
:type dstr: str
:return: A datetime represented the converted input.
:rtype: datetime.datetime
:raise ValueError. if no integers are found as components of the string
"""
# In case of empty string
if (dstr is None) or (not len(dstr)):
......
###################################################################################################
# (C) 2021 Helmholtz Centre Potsdam GFZ German Research Centre for Geosciences, Potsdam, Germany #
# #
# This file is part of dastools. #
# #
# dastools is free software: you can redistribute it and/or modify it under the terms of the GNU #
# General Public License as published by the Free Software Foundation, either version 3 of the #
# License, or (at your option) any later version. #
# #
# dastools is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without #
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
# General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License along with this program. If #
# not, see https://www.gnu.org/licenses/. #
###################################################################################################
"""TDMS module from dastools.
This file is part of dastools.
dastools is free software: you can redistribute it and/or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
dastools is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If
not, see https://www.gnu.org/licenses/.
:Copyright:
2021 Helmholtz Centre Potsdam GFZ German Research Centre for Geosciences, Potsdam, Germany
:License:
GPLv3
:Platform:
Linux
.. moduleauthor:: Javier Quinteros <javier@gfz-potsdam.de>, GEOFON, GFZ Potsdam
"""
import logging
import datetime
......@@ -28,12 +36,20 @@ from math import ceil
from numbers import Number
# Some examples with PDN_1km show that for 16 files of 44 MB (704 MB) we see the following
# With simple encoding of I16 and a record of 4096 bytes we use 720MB
# With STEIM2, a data type of I32 and a record of 4096 bytes we use 504 MB
# With STEIM2, a data type of I32, a decimation factor of 5, and a record of 4096 bytes we use 109 MB
def tup2time(fraction: int, seconds: int) -> datetime.datetime:
"""Convert a tuple of fraction and seconds in a timestamp
def tup2time(fraction, seconds):
:param fraction: The least significant 64 bits should be interpreted as a 64-bit unsigned integer.
It represents the number of 2-64 seconds after the whole seconds specified in the most significant 64-bits.
:type fraction: int
:param seconds: The most significant 64 bits should be interpreted as a 64-bit signed two's complement integer.
It represents the number of whole seconds after the Epoch 01/01/1904 00:00:00.00 UTC.
:type seconds: int
:returns: Datetime of the timestamp
:rtype: datetime.datetime
.. todo:: Check in case of little endian if the parameters are not swapped
"""
# logs = logging.getLogger('tup2time')
# logs.debug('seconds: %s' % seconds)
# logs.debug('fraction: %s' % fraction)
......@@ -46,15 +62,25 @@ def tup2time(fraction, seconds):
class PotentialGap(Exception):
"""Exception to signal that a gap has been found"""
pass
class NoData(Exception):
"""Exception to signal that there is no data"""
pass
class TDMS(object):
"""Class to read and export seismic waveforms in TDMS format"""
"""Class to read, process and export seismic waveforms in TDMS format
.. note::
Some examples with PDN_1km show that for 16 files of 44 MB (704 MB) we see the following
With simple encoding of I16 and a record of 4096 bytes we use 720MB
With STEIM2, a data type of I32 and a record of 4096 bytes we use 504 MB
With STEIM2, a data type of I32, a decimation factor of 5, and a record of 4096 bytes we use 109 MB
"""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Method which close open resources after using the syntax 'with object:' and use it inside"""
......@@ -62,20 +88,21 @@ class TDMS(object):
self.__fi.close()
def __enter__(self):
"""Method which allows to use the syntax 'with object:' and use it inside"""
"""Method which allows to use the syntax 'with object:' and use it inside
# Create a buffer space to store the signal coefficients to be
# convoluted during the decimation
# for channel in range(self.__chstart, self.__chstop + 1, self.__chstep):
Create a buffer space to store the signal coefficients to be convoluted during the decimation
"""
for channel in self.__channels:
logging.debug('Create empty buffer for channel %s' % channel)
self.__buffer[channel] = None
return self
def __init__(self, filename, directory='.', chstart=0, chstop=None, chstep=1, channels=None,
starttime=None, endtime=None, iterate='D', decimate=1, firfilter='fir235',
networkcode='XX', channelcode='FSF', loglevel='INFO'):
def __init__(self, filename: str, directory: str = '.', chstart: int = 0, chstop: int = None,
chstep: int = 1, channels: list = None, starttime: datetime.datetime = None,
endtime: datetime.datetime = None, iterate: str = 'D', decimate: int = 1,
firfilter: str = 'fir235', networkcode: str = 'XX', channelcode: str = 'FSF',
loglevel: str = 'INFO'):
"""Initialize the TDMS object selecting the data, channels and decimation
:param filename: Experiment to read and process. Usually the first part of the filenames
......@@ -106,6 +133,10 @@ class TDMS(object):
:type channelcode: str
:param loglevel: Verbosity in the output
:type loglevel: str
:raise TypeError: If chstart, or chstop, or chstep are not int. If channels is not a list, or networkcode is
not a 2 characters code, or channelcode is not a 3 characters code.
:raise Exception: If channels is empty.
:raise NoData: If there is no more data available
"""
# Log level
......@@ -120,12 +151,12 @@ class TDMS(object):
# Selection of channels
if not isinstance(chstart, Number) or not isinstance(chstep, Number):
logs.error('chstart and chstep must be numbers and preferably ints')
raise TypeError('chstart and chstep must be numbers and preferably ints')
logs.error('chstart and chstep must be ints')
raise TypeError('chstart and chstep must be ints')
if not isinstance(chstop, Number) and chstop is not None:
logs.error('chstop must be a number or None')
raise TypeError('chstop must be a number or None')
logs.error('chstop must be an int or None')
raise TypeError('chstop must be an int or None')
if not isinstance(channels, list) and channels is not None:
logs.error('channels must be a list of numbers or None')
......@@ -261,6 +292,12 @@ class TDMS(object):
raise NoData()
def __select_file(self):
"""Select a file from the experiment based on the status of the object
:raise Exception: If data not available in the specified time window. If the header does not
indicate that the file is a TDMS format
:raise IndexError: If the last file has already been processed or the start is greater than end
"""
logs = logging.getLogger('Select file')
logs.setLevel(self.__loglevel)
......@@ -357,7 +394,9 @@ class TDMS(object):
def __search_data(self):
"""
:raise: IndexError
Select a file to work with, read its metadata and calculate samples to read
:raise IndexError: If the last file has already been processed or the start is greater than end
"""
while True:
......@@ -375,12 +414,21 @@ class TDMS(object):
# Rename to make it accessible to users
def reset(self):
"""Reset the status of the object and start the read again
:raise IndexError: If the last file has already been processed or the start is greater than end
"""
self.__twstart = self.__origstarttime
self.__twend = self.__origendtime
self.__currentfile = None
self.__search_data()
def __readmetadata(self):
"""Read metadata of the current file
:raise Exception: if datatype definition is not found in any channel, or if the data type is not
supported, or if no valid channel IDs could be selected
"""
# Metadata
logs = logging.getLogger('Read Metadata')
# handler = logging.StreamHandler(sys.stdout)
......@@ -522,7 +570,11 @@ class TDMS(object):
# newobjects = struct.unpack('%cI' % self.__endian, self.__fi.read(4))[0]
def __iter__(self):
"""Iterate through data (or metadata) and filter and decimate if requested"""
"""Iterate through data (or metadata) and filter and decimate if requested
:returns: Data and attributes for the header, or metadata
:rtype: tuple(numpy.array, obspy.core.trace.Stats) or dict
"""
# Create logger
logs = logging.getLogger('__iter__')
......@@ -536,7 +588,7 @@ class TDMS(object):
# If no decimation is needed
if self.__decimate == 1:
for data, stats in self.__iter_data__():
yield (data.astype(self.__outdatatype), stats)
yield data.astype(self.__outdatatype), stats
else:
# Use an input buffer to store the data coming in chunks from __iter_data__
inbuf = dict()
......@@ -620,13 +672,17 @@ class TDMS(object):
logs.debug('filtered: leave %d components for next iteration %s' % (leftover, nodecimation[ch][-leftover:]))
if leftover:
if 'data' not in outbuf[ch]:
# FIXME Decimation factor is hardcoded
outbuf[ch]['data'] = nodecimation[ch][:-leftover][::5]
else:
# FIXME Decimation factor is hardcoded
outbuf[ch]['data'] = np.append(outbuf[ch]['data'], nodecimation[ch][:-leftover][::5])
else:
if 'data' not in outbuf[ch]:
# FIXME Decimation factor is hardcoded
outbuf[ch]['data'] = nodecimation[ch][::5]
else:
# FIXME Decimation factor is hardcoded
outbuf[ch]['data'] = np.append(outbuf[ch]['data'], nodecimation[ch][::5])
logs.debug('outbuf[%d][:11] %s' % (ch, outbuf[ch]['data'][:11]))
......@@ -657,7 +713,11 @@ class TDMS(object):
yield outbuf[ch]['data'], outbuf[ch]['stats']
def __iter_data__(self):
"""Read data from files based on channel selection"""
"""Read data from files based on channel selection
:return: Data and attributes for the header
:rtype: tuple(numpy.array, obspy.core.trace.Stats)
"""
# Data
logs = logging.getLogger('Iterate Data')
......@@ -692,6 +752,11 @@ class TDMS(object):
break
def __iter_metadata__(self):
"""Read metadata from files based on channel selection
:return: Metadata from selected channels
:rtype: dict
"""
# Metadata
# logs = logging.getLogger('Iterate Metadata')
......@@ -711,12 +776,18 @@ class TDMS(object):
except IndexError:
break
def __readstring(self):
"""All strings in TDMS files, such as object paths, property names, property values, and raw data values,
def __readstring(self) -> str:
"""Read a string from a TDMS file
All strings in TDMS files, such as object paths, property names, property values, and raw data values,
are encoded in UTF-8 Unicode. All of them, except for raw data values, are preceded by a 32-bit unsigned
integer that contains the length of the string in bytes, not including the length value itself. Strings in
TDMS files can be null-terminated, but since the length information is stored, the null terminator will be
ignored when you read from the file.
:return: String read
:rtype: str
"""
# logs = logging.getLogger('readstring')
strlen = struct.unpack('%cI' % self.__endian, self.__fi.read(4))
......@@ -724,6 +795,11 @@ class TDMS(object):
return self.__fi.read(strlen[0]).decode()
def __readvalue(self):
"""Read a value from a TDMS file
:return: Value
:rtype: char or int or float or str or boolean or datetime.datetime
"""
# logs = logging.getLogger('readvalue')
datatype = self.__readdatatype()
......@@ -749,11 +825,18 @@ class TDMS(object):
# logs.debug('result: %s' % result)
return result
def __readproperties(self, result=None):
"""For each property, the following information is stored:
def __readproperties(self, result: dict = None) -> dict:
"""Read one or many properties and save them in a dictionary
:param result: Where to store the properties read
:type result: dict
:return: Properties read
:rtype: dict
For each property, the following information is stored:
Name (string)
Data type (tdsDataType)
Value (numerics stored binary, strings stored as explained above).
Value (numeric stored binary, string stored as explained above).
"""
logs = logging.getLogger('readproperties')
......@@ -773,17 +856,19 @@ class TDMS(object):
return result
def __readdatatype(self):
def __readdatatype(self) -> int:
"""Read data type of a field in TDMS format"""
return struct.unpack('%cI' % self.__endian, self.__fi.read(4))[0]
def __readdata(self, channels=None):
def __readdata(self, channels: list = None) -> dict:
"""Read a chunk of data from the specified channels.
Update the attribute __samplecur
:param channels: List of channel numbers to read data from
:type channels: list
:return: Dictionary with channel as key and a numpy array with data as value
:return: Dictionary with channel number as key and a numpy array with data as value
:rtype: dict
:raise Exception: if trying to read data from an originally unselected channel
"""
logs = logging.getLogger('Read data')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment