Commit cd7f2151 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Added drafts for revised prediction methods.

parent 942f268f
......@@ -1985,7 +1985,7 @@ class RSImage_ClusterPredictor(object):
image_predicted = GeoArray(np.empty((image.rows, image.cols, classifier.tgt_n_bands), dtype=image.dtype),
geotransform=image.gt, projection=image.prj, nodata=image.nodata)
for ((rS, rE), (cS, cE)), im_tile in image.tiles(tilesize=(1000, 1000)):
for ((rS, rE), (cS, cE)), im_tile in image.tiles(tilesize=(100, 100)):
print('Predicting tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
# 3D -> 2D
src_spectra = im2spectra(im_tile)
......@@ -2007,6 +2007,45 @@ class RSImage_ClusterPredictor(object):
return image_predicted
# def predict(self, image, classifier, nodataVal=None, CPUs=1):
# # type: (Union[np.ndarray, GeoArray], any, float, int) -> GeoArray
# """Apply the prediction function of the given specifier to the given remote sensing image.
#
# :param image: 3D array representing the input image
# :param classifier: the classifier instance
# :param nodataVal: no data value of the input image (ignored if image is a GeoArray with existing nodata value)
# :param CPUs: CPUs to use (default: 1)
# :return: 3D array representing the predicted spectral image cube
# """
# image = image if isinstance(image, GeoArray) else GeoArray(image, nodata=nodataVal)
# image.nodata = image.nodata if image.nodata is not None else nodataVal
#
# # assign each input pixel to a cluster (compute classfication with cluster centers as endmembers)
# if not self.classif_map:
# self.classif_map = self.classify_image(image, classifier, method='kNN', nodataVal=nodataVal)
#
# # adjust classifier
# if CPUs is None or CPUs > 1:
# # FIXME does not work -> parallelize with https://github.com/ajtulloch/sklearn-compiledtrees?
# classifier.n_jobs = cpu_count() if CPUs is None else CPUs
#
# # apply prediction
# src_spectra = im2spectra(image)
# clusterPixVals1D = self.classif_map.flatten() # integer array
# spectra_pred = classifier.predict(src_spectra, clusterPixVals1D, nodataVal=nodataVal).astype(image.dtype)
# # 2D -> 3D
# image_predicted = GeoArray(spectra2im(spectra_pred, tgt_rows=image.shape[0], tgt_cols=image.shape[1]).astype(image.dtype),
# geotransform=image.gt, projection=image.prj, nodata=image.nodata)
#
# # re-apply nodata values to predicted result
# if image.nodata is not None:
# image_predicted[image.mask_nodata[:] == 0] = image.nodata
#
# # copy mask_nodata
# image_predicted.mask_nodata = image.mask_nodata
#
# return image_predicted
def compute_prediction_errors(self, im_predicted, cluster_classifier, nodataVal=None):
# type: (Union[np.ndarray, GeoArray], Cluster_Learner, float) -> np.ndarray
"""Compute errors that quantify prediction inaccurracy per band and per pixel.
......@@ -2069,6 +2108,85 @@ class RSImage_ClusterPredictor(object):
return errors
def compute_prediction_errors(self, im_predicted, cluster_classifier, nodataVal=None):
# type: (Union[np.ndarray, GeoArray], Cluster_Learner, float) -> np.ndarray
"""Compute errors that quantify prediction inaccurracy per band and per pixel.
:param im_predicted: 3D array representing the predicted image
:param cluster_classifier: instance of Cluster_Learner
:param nodataVal: no data value of the input image
(ignored if image is a GeoArray with existing nodata value)
:return: 3D array (int16) representing prediction errors per band and pixel
"""
im_predicted = im_predicted if isinstance(im_predicted, GeoArray) else GeoArray(im_predicted, nodata=nodataVal)
im_predicted.nodata = im_predicted.nodata if im_predicted.nodata is not None else nodataVal
for cls in cluster_classifier:
if not len(cls.rmse_per_band) == GeoArray(im_predicted).bands:
raise ValueError('The given classifier contains error statistics incompatible to the shape of the '
'image.')
if not self.classif_map:
raise RuntimeError('self.classif_map must be generated by running self.classify_image() beforehand.')
errors = np.empty_like(im_predicted)
# iterate over all cluster labels and copy rmse values
for pixVal in sorted(list(np.unique(self.classif_map))):
if pixVal == nodataVal:
continue
print('Inpainting error values for cluster #%s...' % pixVal)
rmse_per_band_int = np.round(cluster_classifier.MLdict[pixVal].rmse_per_band, 0).astype(np.int16)
errors[self.classif_map == pixVal] = rmse_per_band_int
# TODO validate this equation
# errors = (errors * im_predicted[:] / 10000).astype(errors.dtype)
##################################################################
# # 3D -> 2D
# src_spectra = im2spectra(im_predicted)
# clusterPixVals1D = self.classif_map.flatten() # integer array
#
# # assign clusterlabel to each spectrum
# src_df = DataFrame(src_spectra)
# src_df.insert(0, 'cluster_label', clusterPixVals1D)
#
# # create target df
# errs_df = DataFrame(columns=['cluster_label'] + cluster_classifier.tgt_LBA)
# errs_df.cluster_label = clusterPixVals1D
# if nodataVal:
# errs_df.fillna(nodataVal, inplace=True)
#
# # iterate over all cluster labels and apply corresponding machine learner parameters to predict target spectra
# for pixVal in sorted(list(np.unique(clusterPixVals1D))):
# if pixVal == nodataVal:
# continue
#
# classifier = cluster_classifier.MLdict[pixVal]
# src_spectra_sub = np.array(src_df[src_df.cluster_label == pixVal])[:, 1:]
#
# # compute errors
# # TODO validate this equation
# # NOTE: 10000 is the BOA reflectance scaling factor
# errs_sub = (src_spectra_sub * classifier.rmse_per_band / 10000).astype(np.int16)
#
# # append errors of current cluster at the correct position to errs_df
# errs_df.loc[src_df.cluster_label == pixVal, cluster_classifier.tgt_LBA] = errs_sub
#
# # get errors for all clusters
# errors2D = np.array(errs_df.loc[:, cluster_classifier.tgt_LBA])
#
# # convert 2D errors array to image
# errors = errors2D.reshape(im_predicted.shape[0], im_predicted.shape[1], errors2D.shape[1])
# re-apply nodata values to predicted result
if im_predicted.nodata is not None:
# errors[im_predicted == im_predicted.nodata] = im_predicted.nodata
errors[im_predicted.mask_nodata.astype(np.int8) == 0] = im_predicted.nodata
return errors
# class GMS_Machine_Learner(LinearRegression, Ridge, Pipeline):
# def __init__(self, method, src_satellite, src_sensor, tgt_satellite, tgt_sensor, src_LBA, tgt_LBA):
......@@ -2189,20 +2307,23 @@ class Cluster_Learner(object):
:return:
"""
# assign clusterlabel to each spectrum
src_df = DataFrame(src_spectra)
src_df = DataFrame(src_spectra, columns=self.src_LBA)
src_df.insert(0, 'cluster_label', clusterPixVals1D)
# create target df
tgt_df = DataFrame(columns=['cluster_label'] + self.tgt_LBA)
tgt_df.cluster_label = clusterPixVals1D
if nodataVal:
tgt_df.fillna(nodataVal, inplace=True)
# tgt_df = DataFrame(columns=['cluster_label'] + self.tgt_LBA)
tgt_df = DataFrame(np.hstack([clusterPixVals1D.reshape(-1, 1), np.full((src_spectra.shape[0], self.tgt_n_bands), nodataVal)]),
columns=['cluster_label'] + self.tgt_LBA)
# tgt_df.cluster_label = clusterPixVals1D
# if nodataVal:
# tgt_df.fillna(nodataVal, inplace=True)
# iterate over all cluster labels and apply corresponding machine learner parameters to predict target spectra
for pixVal in sorted(list(np.unique(clusterPixVals1D))):
if pixVal == nodataVal:
continue
# print(pixVal)
classifier = self.MLdict[pixVal]
src_spectra_sub = np.array(src_df[src_df.cluster_label == pixVal])[:, 1:]
spectra_pred_sub = classifier.predict(src_spectra_sub).astype(src_spectra.dtype)
......@@ -2214,3 +2335,75 @@ class Cluster_Learner(object):
spectra_pred = np.array(tgt_df.loc[:, self.tgt_LBA])
return spectra_pred
# def predict(self, src_spectra, clusterPixVals1D, nodataVal=None):
# """
#
# :param src_spectra:
# :param clusterPixVals1D: classification map that assigns each image spectrum to its corresponding cluster
# -> must be a 1D np.ndarray with the same Y-dimension like src_spectra
# :param nodataVal:
# :return:
# """
# # assign clusterlabel to each spectrum
# src_df = DataFrame(src_spectra, columns=self.src_LBA)
# src_df.insert(0, 'cluster_label', clusterPixVals1D)
# # src_df.insert(1, 'classifier', np.nan)
# # src_df.classifier = list(src_df.cluster_label.map(lambda lbl: self.MLdict[lbl] if lbl != nodataVal else nodataVal))
#
# # create target df
# # tgt_df = DataFrame(columns=['cluster_label'] + self.tgt_LBA)
# # tgt_df.cluster_label = clusterPixVals1D
# # if nodataVal:
# # tgt_df.fillna(nodataVal, inplace=True)
# tgt_df = DataFrame(self.tgt_LBA)
# # tgt_df.cluster_label = clusterPixVals1D
# # if nodataVal:
# # tgt_df.fillna(nodataVal, inplace=True)
#
# # tgt_df[:, 1:] = np.array(src_df[:, 2:].map(lambda spectrum: ))
#
# # def predict(src_df_row):
# # # if src_df_row.cluster_label == nodataVal:
# # # return nodataVal
# # # else:
# # print(src_df_row.cluster_label)
# # return self.MLdict[src_df_row.cluster_label].predict(src_df_row[self.src_LBA].values.reshape(1, -1))
# # # return src_df_row['classifier'].predict(src_df_row[self.src_LBA].values.reshape(1, -1))
# #
# # tgt_df.loc[src_df.cluster_label != nodataVal, self.tgt_LBA] = \
# # src_df[src_df.cluster_label != nodataVal].apply(lambda src_df_row: predict(src_df_row), axis=1)
#
# @jit
# def predict(arr):
# # if src_df_row.cluster_label == nodataVal:
# # return nodataVal
# # else:
# # print(src_df_row.cluster_label)
# for i in range(arr.shape[0]):
# if arr[i, 0] == nodataVal:
# continue
# print(i)
# tgt_df[i] = self.MLdict[arr[i, 0]].predict(arr[i, 1:].reshape(1, -1))
#
# predict(src_df.values)
# # return self.MLdict[src_df_row.cluster_label].predict(src_df_row[self.src_LBA].values.reshape(1, -1))
# # return src_df_row['classifier'].predict(src_df_row[self.src_LBA].values.reshape(1, -1))
#
# # # iterate over all cluster labels and apply corresponding machine learner parameters to predict target spectra
# # for pixVal in sorted(list(np.unique(clusterPixVals1D))):
# # if pixVal == nodataVal:
# # continue
# #
# # classifier = self.MLdict[pixVal]
# # src_spectra_sub = np.array(src_df[src_df.cluster_label == pixVal])[:, 1:]
# # spectra_pred_sub = classifier.predict(src_spectra_sub).astype(src_spectra.dtype)
# #
# # # append predicted spectra at the correct position to tgt_df
# # tgt_df.loc[src_df.cluster_label == pixVal, self.tgt_LBA] = spectra_pred_sub
#
# # get predicted spectra for all clusters
# spectra_pred = np.array(tgt_df.loc[:, self.tgt_LBA])
#
# return spectra_pred
......@@ -274,9 +274,9 @@ class Test_SpectralHomogenizer(unittest.TestCase):
cfg = set_config(job_ID=26186196, db_host=db_host, reset_status=True, is_test=True)
cls.SpH = SpectralHomogenizer(classifier_rootDir=cfg.path_spechomo_classif)
# cls.testArr_L8 = GeoArray(np.random.randint(1, 10000, (50, 50, 7), dtype=np.int16)) # no band 9, no pan
cls.testArr_L8 = GeoArray(np.random.randint(1, 10000, (50, 50, 7), dtype=np.int16)) # no band 9, no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B__250x250.bsq') # no pan
cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B.bsq') # no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B.bsq') # no pan
# cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1609.09, 2201.25]
cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1373.48, 1609.09, 2201.25]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment