Commit aa1029c7 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Added logging to ClusterClassifier_Generator and RSImage_ClusterPredictor.

parent a2e8b975
Pipeline #3205 failed with stage
in 17 minutes and 39 seconds
......@@ -1205,13 +1205,26 @@ class RefCube(object):
class ClusterClassifier_Generator(object):
"""Class for creating collections of machine learning classifiers that can be used for spectral homogenization."""
def __init__(self, list_refcubes):
# type: (List[Union[str, RefCube]]) -> None
def __init__(self, list_refcubes, logger=None):
# type: (List[Union[str, RefCube]], logging.Logger) -> None
"""Get an instance of Classifier_Generator.
:param list_refcubes: list of RefCube instances for which the classifiers are to be created.
:param logger: instance of logging.Logger()
"""
self.refcubes = [RefCube(inRC) if isinstance(inRC, str) else inRC for inRC in list_refcubes]
self.logger = logger or GMS_logger(__name__) # must be pickable
def __getstate__(self):
"""Defines how the attributes of ReferenceCube_Generator instances are pickled."""
close_logger(self.logger)
self.logger = None
return self.__dict__
def __del__(self):
close_logger(self.logger)
self.logger = None
@staticmethod
def _get_derived_LayerBandsAssignments(satellite, sensor):
......@@ -1352,8 +1365,8 @@ class ClusterClassifier_Generator(object):
# validate and set defaults
if method == 'RFR':
if n_clusters > 1:
warnings.warn("The spectral homogenization method 'Random Forest Regression' does not allow spectral "
"sub-clustering. Setting 'n_clusters' to 1.")
self.logger.warning("The spectral homogenization method 'Random Forest Regression' does not allow "
"spectral sub-clustering. Setting 'n_clusters' to 1.")
n_clusters = 1
if 'n_jobs' not in kwargs:
......@@ -1370,16 +1383,17 @@ class ClusterClassifier_Generator(object):
n_clusters=n_clusters)
# get cluster labels for each source cube separately
print('Clustering %s %s reference cube (%s clusters)...'
% (src_cube.satellite, src_cube.sensor, n_clusters))
self.logger.info('Clustering %s %s reference cube (%s clusters)...'
% (src_cube.satellite, src_cube.sensor, n_clusters))
labels1D = self.cluster_refcube_spectra(src_cube, n_clusters=n_clusters, CPUs=CPUs)
for tgt_cube in self.refcubes:
if (src_cube.satellite, src_cube.sensor) == (tgt_cube.satellite, tgt_cube.sensor):
continue
clf_str = 'classifier' if n_clusters == 1 else 'cluster classifier'
print("Creating %s %s to predict %s %s from %s %s..."
% (method, clf_str, tgt_cube.satellite, tgt_cube.sensor, src_cube.satellite, src_cube.sensor))
self.logger.info("Creating %s %s to predict %s %s from %s %s..."
% (method, clf_str, tgt_cube.satellite, tgt_cube.sensor,
src_cube.satellite, src_cube.sensor))
src_derived_LBAs = self._get_derived_LayerBandsAssignments(src_cube.satellite, src_cube.sensor)
tgt_derived_LBAs = self._get_derived_LayerBandsAssignments(tgt_cube.satellite, tgt_cube.sensor)
......@@ -1387,8 +1401,8 @@ class ClusterClassifier_Generator(object):
for src_LBA in src_derived_LBAs:
for tgt_LBA in tgt_derived_LBAs:
# print('Creating %s cluster classifier for LBA %s => %s...'
# % (method, '_'.join(src_LBA), '_'.join(tgt_LBA)))
self.logger.debug('Creating %s cluster classifier for LBA %s => %s...'
% (method, '_'.join(src_LBA), '_'.join(tgt_LBA)))
# Get center wavelength positions
# NOTE: they cannot be taken from RefCube instances because they always represent L1A LBAs
......@@ -1410,7 +1424,7 @@ class ClusterClassifier_Generator(object):
tgt_df.insert(0, 'cluster_label', labels1D)
for clusterlabel in range(n_clusters):
# print('Creating %s classifier for cluster %s...' % (method, clusterlabel))
self.logger.debug('Creating %s classifier for cluster %s...' % (method, clusterlabel))
# Set train and test variables
# NOTE: If random_state is set to an Integer,
......@@ -1512,8 +1526,8 @@ class ClassifierCollection(object):
class RSImage_ClusterPredictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described by the given args."""
def __init__(self, method='LR', n_clusters=50, classif_alg='MinDist', kNN_n_neighbors=10, classifier_rootDir='',
CPUs=1):
# type: (str, int, str, int, str, Union[None, int]) -> None
CPUs=1, logger=None):
# type: (str, int, str, int, str, Union[None, int], logging.Logger) -> None
"""Get an instance of RSImage_ClusterPredictor.
:param method: machine learning approach to be used for spectral bands prediction
......@@ -1536,6 +1550,7 @@ class RSImage_ClusterPredictor(object):
'kNN'. Otherwise, this parameter is ignored.
:param classifier_rootDir: root directory where machine learning classifiers are stored.
:param CPUs: number of CPUs to use
:param logger: instance of logging.Logger()
"""
self.method = method
self.n_clusters = n_clusters
......@@ -1544,13 +1559,25 @@ class RSImage_ClusterPredictor(object):
self.classif_map = None
self.CPUs = CPUs
self.classif_alg = classif_alg
self.logger = logger or GMS_logger(__name__) # must be pickable
# validate
if method == 'RFR' and n_clusters > 1:
warnings.warn("The spectral homogenization method 'Random Forest Regression' does not allow spectral sub-"
"clustering. Setting 'n_clusters' to 1.")
self.logger.warning("The spectral homogenization method 'Random Forest Regression' does not allow spectral "
"sub-clustering. Setting 'n_clusters' to 1.")
self.n_clusters = 1
def __getstate__(self):
"""Defines how the attributes of ReferenceCube_Generator instances are pickled."""
close_logger(self.logger)
self.logger = None
return self.__dict__
def __del__(self):
close_logger(self.logger)
self.logger = None
def get_classifier(self, src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor, tgt_LBA):
# type: (str, str, list, str, str, list) -> Cluster_Learner
"""Select the correct machine learning classifier out of previously saves classifier collections.
......@@ -1624,7 +1651,8 @@ class RSImage_ClusterPredictor(object):
kNN_n_neighbors=self.kNN_n_neighbors,
nodataVal=cmap_nodataVal, # written into classif_map at nodata pixels
CPUs=self.CPUs)
print('Total classification time: %s' % time.strftime("%H:%M:%S", time.gmtime(time.time() - t0)))
self.logger.info('Total classification time: %s'
% time.strftime("%H:%M:%S", time.gmtime(time.time() - t0)))
else:
self.classif_map = np.full((image.rows, image.cols), classifier.cluster_pixVals[0], np.int8)
......@@ -1641,7 +1669,7 @@ class RSImage_ClusterPredictor(object):
t0 = time.time()
for ((rS, rE), (cS, cE)), im_tile in image.tiles(tilesize=(1000, 1000)):
print('Predicting tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
self.logger.info('Predicting tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
classif_map_tile = self.classif_map[rS: rE+1, cS: cE+1] # integer array
......@@ -1650,7 +1678,7 @@ class RSImage_ClusterPredictor(object):
nodataVal=nodataVal, cmap_nodataVal=cmap_nodataVal).astype(image.dtype)
image_predicted[rS:rE + 1, cS:cE + 1] = im_tile_pred
print('Total prediction time: %s' % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0)))
self.logger.info('Total prediction time: %s' % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0)))
# re-apply nodata values to predicted result
if image.nodata is not None:
......@@ -1695,7 +1723,7 @@ class RSImage_ClusterPredictor(object):
if pixVal == cmap_nodataVal:
continue
print('Inpainting error values for cluster #%s...' % pixVal)
self.logger.info('Inpainting error values for cluster #%s...' % pixVal)
rmse_per_band_int = np.round(cluster_classifier.MLdict[pixVal].rmse_per_band, 0).astype(np.int16)
errors[self.classif_map == pixVal] = rmse_per_band_int
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment