Commit 2ce6489d authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Added new config parameters 'spechomo_n_clusters', 'spechomo_classif_alg',...

Added new config parameters 'spechomo_n_clusters', 'spechomo_classif_alg', 'spechomo_kNN_n_neighbors' to control spectral homogenization through cluster learner prediction.
Cluster classifier are now saved with float32 data instead of float64 to save memory.
parent 91f75477
......@@ -209,10 +209,10 @@ class SpectralHomogenizer(object):
return outarr
def predict_by_machine_learner(self, arrcube, method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA, nodataVal=None, compute_errors=False,
bandwise_errors=True, **fallback_argskwargs):
# type: (Union[np.ndarray, GeoArray], str, str, str, list, str, str, list, int, bool, dict) -> tuple
def predict_by_machine_learner(self, arrcube, method, src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor,
tgt_LBA, n_clusters=50, classif_alg='MinDist', kNN_n_neighbors=10,
nodataVal=None, compute_errors=False, bandwise_errors=True, **fallback_argskwargs):
# type: (Union[np.ndarray, GeoArray], str, str, str, list, str, str, list, int, str, int, int, ...) -> tuple
"""Predict spectral bands of target sensor by applying a machine learning approach.
:param arrcube: input image array for target sensor spectral band prediction (rows x cols x bands)
......@@ -226,6 +226,19 @@ class SpectralHomogenizer(object):
:param tgt_satellite: target satellite, e.g., 'Landsat-8'
:param tgt_sensor: target sensor, e.g., 'OLI_TIRS'
:param tgt_LBA: target LayerBandsAssignment
:param n_clusters: Number of spectral clusters to be used during LR/ RR/ QR homogenization.
E.g., 50 means that the image to be converted to the spectral target sensor
is clustered into 50 spectral clusters and one separate machine learner per
cluster is applied to the input data to predict the homogenized image. If
'spechomo_n_clusters' is set to 1, the source image is not clustered and
only one machine learning classifier is used for prediction.
:param classif_alg: Multispectral classification algorithm to be used to determine the spectral cluster
each pixel belongs to.
'MinDist': Minimum Distance (Nearest Centroid) Classification
'kNN': k-Nearest-Neighbor Classification
'SAM': Spectral Angle Mapping
:param kNN_n_neighbors: The number of neighbors to be considered in case 'classif_alg' is set to 'kNN'.
Otherwise, this parameter is ignored.
:param nodataVal: no data value
:param compute_errors: whether to compute pixel- / bandwise model errors for estimated pixel values
(default: false)
......@@ -236,11 +249,15 @@ class SpectralHomogenizer(object):
:rtype: Tuple[np.ndarray, Union[np.ndarray, None]]
"""
# TODO: add LBA validation to .predict()
if 'clust' in method:
PR = RSImage_ClusterPredictor(method=method.split('clust')[0], classifier_rootDir=self.classifier_rootDir,
classif_alg='MinDist') # FIXME hardcoded classif_alg
if n_clusters > 1:
PR = RSImage_ClusterPredictor(method=method,
classifier_rootDir=self.classifier_rootDir,
n_clusters=n_clusters,
classif_alg=classif_alg,
kNN_n_neighbors=kNN_n_neighbors)
else:
PR = RSImage_Predictor(method=method, classifier_rootDir=self.classifier_rootDir)
PR = RSImage_Predictor(method=method,
classifier_rootDir=self.classifier_rootDir)
######################
# get the classifier #
......@@ -1250,9 +1267,9 @@ class Classifier_Generator(object):
predicted = ML.predict(test_X) # returns 2D array (spectral samples x bands), e.g. 640 x 6
# NOTE: 'raw_values': RMSE is column-wise computed
# => yields the same result as one would compute the RMSE band by band
rmse = np.sqrt(mean_squared_error(test_Y, predicted, multioutput='raw_values'))
mae = mean_absolute_error(test_Y, predicted, multioutput='raw_values')
mape = mean_absolute_percentage_error(test_Y, predicted)
rmse = np.sqrt(mean_squared_error(test_Y, predicted, multioutput='raw_values')).astype(np.float32)
mae = mean_absolute_error(test_Y, predicted, multioutput='raw_values').astype(np.float32)
mape = mean_absolute_percentage_error(test_Y, predicted).astype(np.float32)
# predicted_train = ML.predict(train_X)
# rmse_train = np.sqrt(mean_squared_error(train_Y, predicted_train, multioutput='raw_values'))
......@@ -1268,6 +1285,16 @@ class Classifier_Generator(object):
ML.mae_per_band = list(mae)
ML.mape_per_band = list(mape)
# convert float64 attributes to float32 to save memory (affects <0,05% of homogenized pixels by 1 DN)
for attr in ['coef_', 'intercept_', 'singular_', '_residues']:
if isinstance(ML, Pipeline):
setattr(ML._final_estimator, attr, getattr(ML._final_estimator, attr).astype(np.float32))
else:
try:
setattr(ML, attr, getattr(ML, attr).astype(np.float32))
except AttributeError:
pass
return ML
def create_classifiers(self, outDir, method='LR', **kwargs):
......@@ -1725,27 +1752,39 @@ class SAM_Classifier(ImageClassifier):
class RSImage_ClusterPredictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described by the given args."""
def __init__(self, method='LR', classifier_rootDir='', CPUs=1, classif_alg='kNN10'):
# type: (str, str) -> None
def __init__(self, method='LR', n_clusters=50, classif_alg='MinDist', kNN_n_neighbors=10, classifier_rootDir='',
CPUs=1):
# type: (str, int, str, int, str, int) -> None
"""Get an instance of RSImage_Predictor.
:param method: machine learning approach to be used for spectral bands prediction
'LR': Linear Regression
'RR': Ridge Regression
'QR': Quadratic Regression
:param n_clusters: Number of spectral clusters to be used during LR/ RR/ QR homogenization.
E.g., 50 means that the image to be converted to the spectral target sensor
is clustered into 50 spectral clusters and one separate machine learner per
cluster is applied to the input data to predict the homogenized image. If
'spechomo_n_clusters' is set to 1, the source image is not clustered and
only one machine learning classifier is used for prediction.
:param classif_alg: algorithm to be used for image classification
(to define which cluster each pixel belongs to)
'MinDist': Minimum Distance (Nearest Centroid)
'kNN': k-nearest-neighbour
'SAM': spectral angle mapping
:param kNN_n_neighbors: The number of neighbors to be considered in case 'classif_alg' is set to
'kNN'. Otherwise, this parameter is ignored.
:param classifier_rootDir: root directory where machine learning classifiers are stored.
:param classif_alg: algoritm to be used for image classification (to define which cluster each pixel belongs to)
'MinDist': Minimum Distance (Nearest Centroid)
'kNN': k-nearest-neighbour
NOTE: The number of considered neighbors may be appended to 'kNN' like 'kNN5'
for 5 neighbors (default: 10).
"""
self.method = method
self.n_clusters = n_clusters
self.kNN_n_neighbors = kNN_n_neighbors
self.classifier_rootDir = os.path.abspath(classifier_rootDir)
self.classif_map = None
self.CPUs = CPUs
self.classif_alg = classif_alg
def get_classifier(self, src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor, tgt_LBA, n_clusters=50):
def get_classifier(self, src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor, tgt_LBA):
# type: (str, str, list, str, str, list) -> any
"""Select the correct machine learning classifier out of previously saves classifier collections.
......@@ -1756,12 +1795,12 @@ class RSImage_ClusterPredictor(object):
:param tgt_satellite: target satellite, e.g., 'Landsat-8'
:param tgt_sensor: target sensor, e.g., 'OLI_TIRS'
:param tgt_LBA: target LayerBandsAssignment
:param n_clusters: number of spectral clusters that the returned classifier has to contain
:return: classifier instance loaded from disk
"""
# fName_cls = get_classifier_filename(self.method, src_satellite, src_sensor, tgt_satellite, tgt_sensor)
fName_cls = get_filename_classifier_collection(self.method, src_satellite, src_sensor, n_clusters=n_clusters)
fName_cls = \
get_filename_classifier_collection(self.method, src_satellite, src_sensor, n_clusters=self.n_clusters)
path_cls = os.path.join(self.classifier_rootDir, fName_cls)
if not os.path.isfile(path_cls):
......@@ -1793,13 +1832,12 @@ class RSImage_ClusterPredictor(object):
:param nodataVal:
:param tiledims:
"""
if self.classif_alg.startswith('kNN'):
n_neighbors = int(self.classif_alg.split('kNN')[1]) if self.classif_alg.split('kNN')[1] else 1
if self.classif_alg == 'kNN':
clf = kNN_Classifier(
cluster_classifier.cluster_centers,
cluster_classifier.cluster_pixVals,
CPUs=self.CPUs,
n_neighbors=n_neighbors)
n_neighbors=self.kNN_n_neighbors)
elif self.classif_alg == 'MinDist':
clf = MinimumDistance_Classifier(
......@@ -1865,8 +1903,10 @@ class RSImage_ClusterPredictor(object):
# copy mask_nodata
image_predicted.mask_nodata = image.mask_nodata
# GeoArray(image_predicted).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/image_predicted_clust50_MinDist_noB9.bsq')
# GeoArray(self.classif_map).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/classif_map_clust50_MinDist_noB9.bsq')
# GeoArray(image_predicted).save(
# '/home/gfz-fe/scheffler/temp/SPECHOM_py/image_predicted_QRclust50_MinDist_noB9.bsq')
# GeoArray(self.classif_map).save(
# '/home/gfz-fe/scheffler/temp/SPECHOM_py/classif_map_QRclust50_MinDist_noB9.bsq')
return image_predicted
......@@ -1910,6 +1950,8 @@ class RSImage_ClusterPredictor(object):
# errors[im_predicted == im_predicted.nodata] = im_predicted.nodata
errors[im_predicted.mask_nodata.astype(np.int8) == 0] = im_predicted.nodata
# GeoArray(errors).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/errors_QRclust50_MinDist_noB9.bsq')
return errors
......
......@@ -329,6 +329,9 @@ class JobConfig(object):
# L2B
self.exec_L2BP = gp('exec_L2BP')
self.spechomo_method = gp('spechomo_method')
self.spechomo_n_clusters = gp('spechomo_n_clusters')
self.spechomo_classif_alg = gp('spechomo_classif_alg')
self.spechomo_kNN_n_neighbors = gp('spechomo_kNN_n_neighbors')
self.spechomo_estimate_accuracy = gp('spechomo_estimate_accuracy')
self.spechomo_bandwise_accuracy = gp('spechomo_bandwise_accuracy')
......
......@@ -150,10 +150,23 @@
"write_output": true,
"delete_output": false,
"spechomo_method": "LR", /*Method used for spectral homogenization.
/*LI: Linear interpolation;
LI: Linear interpolation;
LR: Linear regression;
RR: Ridge regression;
QR: Quadratic regression*/
"spechomo_n_clusters": 50, /*Number of spectral clusters to be used during LR/ RR/ QR homogenization.
E.g., 50 means that the image to be converted to the spectral target sensor
is clustered into 50 spectral clusters and one separate machine learner per
cluster is applied to the input data to predict the homogenized image. If
'spechomo_n_clusters' is set to 1, the source image is not clustered and
only one machine learning classifier is used for prediction.*/
"spechomo_classif_alg": "MinDist", /*Multispectral classification algorithm to be used to determine the
spectral cluster each pixel belongs to.
MinDist: Minimum Distance (Nearest Centroid) Classification
kNN: k-Nearest-Neighbor Classification
SAM: Spectral Angle Mapping*/
"spechomo_kNN_n_neighbors": 10, /*The number of neighbors to be considered in case 'spechomo_classif_alg'
is set to 'kNN'. Otherwise, this parameter is ignored.*/
"spechomo_estimate_accuracy": false, /*whether to produce pixel- and bandwise information about estimation
acurracy of spectral homogenization
NOTE: only possible if 'spechomo_method' is not linear interpol.*/
......
......@@ -125,6 +125,9 @@ gms_schema_input = dict(
write_output=dict(type='boolean', required=False),
delete_output=dict(type='boolean', required=False),
spechomo_method=dict(type='string', required=False, allowed=['LI', 'LR', 'RR', 'QR']),
spechomo_n_clusters=dict(type='integer', required=False, allowed=[1, 5, 10, 15, 20, 30, 40, 50]),
spechomo_classif_alg=dict(type='string', required=False, allowed=['MinDist', 'kNN', 'SAM']),
spechomo_kNN_n_neighbors=dict(type='integer', required=False, min=0),
spechomo_estimate_accuracy=dict(type='boolean', required=False),
spechomo_bandwise_accuracy=dict(type='boolean', required=False),
)),
......@@ -264,6 +267,9 @@ parameter_mapping = dict(
# processors > L2B
exec_L2BP=('processors', 'L2B', ['run_processor', 'write_output', 'delete_output']),
spechomo_method=('processors', 'L2B', 'spechomo_method'),
spechomo_n_clusters=('processors', 'L2B', 'spechomo_n_clusters'),
spechomo_classif_alg=('processors', 'L2B', 'spechomo_classif_alg'),
spechomo_kNN_n_neighbors=('processors', 'L2B', 'spechomo_kNN_n_neighbors'),
spechomo_estimate_accuracy=('processors', 'L2B', 'spechomo_estimate_accuracy'),
spechomo_bandwise_accuracy=('processors', 'L2B', 'spechomo_bandwise_accuracy'),
......
......@@ -195,7 +195,7 @@ class Test_ClusterClassifier_Generator(unittest.TestCase):
CCG = ClusterClassifier_Generator([refcube_l8, refcube_l5])
CCG.create_classifiers(outDir=self.tmpOutdir.name, method='LR', n_clusters=5)
outpath_cls = os.path.join(self.tmpOutdir.name, 'LR__Landsat-8__OLI_TIRS__clust5.dill')
outpath_cls = os.path.join(self.tmpOutdir.name, 'LR_clust5__Landsat-8__OLI_TIRS.dill')
self.assertTrue(os.path.exists(outpath_cls))
with open(outpath_cls, 'rb') as inF:
......@@ -208,7 +208,7 @@ class Test_ClusterClassifier_Generator(unittest.TestCase):
CCG = ClusterClassifier_Generator([refcube_l8, refcube_l5])
CCG.create_classifiers(outDir=self.tmpOutdir.name, method='RR', n_clusters=5)
outpath_cls = os.path.join(self.tmpOutdir.name, 'RR_alpha1.0__Landsat-8__OLI_TIRS__clust5.dill')
outpath_cls = os.path.join(self.tmpOutdir.name, 'RR_alpha1.0_clust5__Landsat-8__OLI_TIRS.dill')
self.assertTrue(os.path.exists(outpath_cls))
with open(outpath_cls, 'rb') as inF:
......@@ -221,7 +221,7 @@ class Test_ClusterClassifier_Generator(unittest.TestCase):
CCG = ClusterClassifier_Generator([refcube_l8, refcube_l5])
CCG.create_classifiers(outDir=self.tmpOutdir.name, method='QR', n_clusters=5)
outpath_cls = os.path.join(self.tmpOutdir.name, 'QR__Landsat-8__OLI_TIRS__clust5.dill')
outpath_cls = os.path.join(self.tmpOutdir.name, 'QR_clust5__Landsat-8__OLI_TIRS.dill')
self.assertTrue(os.path.exists(outpath_cls))
with open(outpath_cls, 'rb') as inF:
......@@ -320,7 +320,10 @@ class Test_SpectralHomogenizer(unittest.TestCase):
def test_predict_by_machine_learner__QR_cluster_L8_S2(self):
"""Test quadratic regression in spectral clusters from Landsat-8 to Sentinel-2A."""
predarr, errors = self.SpH.predict_by_machine_learner(
self.testArr_L8, method='LRclust',
self.testArr_L8,
method='QR',
n_clusters=50,
classif_alg='MinDist',
src_satellite='Landsat-8', src_sensor='OLI_TIRS',
# src_LBA=['1', '2', '3', '4', '5', '6', '7'],
src_LBA=['1', '2', '3', '4', '5', '6', '7'],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment