Commit 9c8a1b8f authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Merge branch 'master' into feature/add_accuracy_layers

parents fa96bee7 bfc787ae
......@@ -92,15 +92,16 @@ class L1A_object(GMS_object):
"Subset argument has be a list with 2 elements."
if subset:
assert subset[0] == 'block',\
"The function 'archive_to_rasObj' only supports block subsetting. Attemted to perform '%s' subsetting."\
% subset[0]
"The function 'archive_to_rasObj' only supports block subsetting. Attempted to perform '%s' " \
"subsetting." % subset[0]
self.logger.info('Reading %s %s %s image data...' % (self.satellite, self.sensor, self.subsystem))
gdal_path_archive = HLP_F.convert_absPathArchive_to_GDALvsiPath(path_archive)
project_dir = os.path.abspath(os.curdir)
os.chdir(os.path.dirname(path_archive))
files_in_archive = gdal.ReadDirRecursive(gdal_path_archive) # needs ~12sek for Landsat-8
assert files_in_archive, 'No files in archive for scene %s (entity ID: %s). ' % (self.scene_ID, self.entity_ID)
assert files_in_archive, 'No files in archive %s for scene %s (entity ID: %s). ' \
% (os.path.basename(path_archive), self.scene_ID, self.entity_ID)
full_LayerBandsAssignment = META.get_LayerBandsAssignment(self.GMS_identifier, no_thermal=False, no_pan=False)
####################################################
......
......@@ -90,23 +90,23 @@ class L2B_object(L2A_object):
if method == 'LI' or CFG.datasetid_spectral_ref is None:
# linear interpolation (if intended by user or in case of custom spectral characteristics of target sensor)
# -> no classifier for that case available -> linear interpolation
outArr = SpH.interpolate_cube(self.arr, src_cwls, tgt_cwls, kind='linear')
im, errs = SpH.interpolate_cube(self.arr, src_cwls, tgt_cwls, kind='linear')
else:
# a known sensor has been specified as spectral reference => apply a machine learner
outArr = SpH.predict_by_machine_learner(self.arr,
method=method,
src_satellite=self.satellite,
src_sensor=self.sensor,
src_LBA=self.LayerBandsAssignment,
tgt_satellite=tgt_sat,
tgt_sensor=tgt_sen,
tgt_LBA=tgt_LBA,
nodataVal=self.arr.nodata,
fallback_argskwargs=dict(
args=dict(source_CWLs=src_cwls, target_CWLs=tgt_cwls,),
kwargs=dict(kind='linear')
))
im, errs = SpH.predict_by_machine_learner(self.arr,
method=method,
src_satellite=self.satellite,
src_sensor=self.sensor,
src_LBA=self.LayerBandsAssignment,
tgt_satellite=tgt_sat,
tgt_sensor=tgt_sen,
tgt_LBA=tgt_LBA,
nodataVal=self.arr.nodata,
fallback_argskwargs=dict(
args=dict(source_CWLs=src_cwls, target_CWLs=tgt_cwls,),
kwargs=dict(kind='linear')
))
###################
# update metadata #
......@@ -117,7 +117,8 @@ class L2B_object(L2A_object):
if 'band names' in self.meta_odict: # FIXME bug workaround
del self.meta_odict['band names'] # TODO
self.arr = outArr
self.arr = im # type: GeoArray
self.spec_homo_errors = errs # type: np.ndarray # int16
class SpectralHomogenizer(object):
......@@ -160,7 +161,7 @@ class SpectralHomogenizer(object):
def predict_by_machine_learner(self, arrcube, method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA, nodataVal=None, **fallback_argskwargs):
# type: (Union[np.ndarray, GeoArray], str, str, str, list, str, str, list, int, dict) -> np.ndarray
# type: (Union[np.ndarray, GeoArray], str, str, str, list, str, str, list, int, dict) -> tuple
"""Predict spectral bands of target sensor by applying a machine learning approach.
:param arrcube: input image array for target sensor spectral band prediction (rows x cols x bands)
......@@ -176,6 +177,7 @@ class SpectralHomogenizer(object):
:param nodataVal: no data value
:param fallback_argskwargs: arguments and keyword arguments for fallback algorithm ({'args':{}, 'kwargs': {}}
:return: predicted array (rows x columns x bands)
:rtype: Tuple[np.ndarray, np.ndarray]
"""
# TODO: add LBA validation to .predict()
PR = RSImage_Predictor(method=method, classifier_rootDir=self.classifier_rootDir)
......@@ -188,11 +190,13 @@ class SpectralHomogenizer(object):
exc = Exception()
try:
cls = PR.get_classifier(src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor, tgt_LBA)
except FileNotFoundError as e:
self.logger.warning('No machine learning classifier available that fulfills the specifications of the '
'spectral reference sensor. Falling back to linear interpolation for performing '
'spectral homogenization.')
exc = e
except ClassifierNotAvailableError as e:
self.logger.error('\nAn error occurred during spectral homogenization using machine learning. '
'Falling back to linear interpolation. Error message was: ')
......@@ -206,14 +210,18 @@ class SpectralHomogenizer(object):
if cls:
self.logger.info('Performing spectral homogenization using %s. Target is %s %s %s.'
% (method, tgt_satellite, tgt_sensor, tgt_LBA))
outarr = PR.predict(arrcube, classifier=cls, nodataVal=nodataVal)
im_homo = PR.predict(arrcube, classifier=cls, nodataVal=nodataVal)
errors = PR.compute_prediction_errors(im_homo, cls, nodataVal=nodataVal)
elif fallback_argskwargs:
# fallback
outarr = self.interpolate_cube(arrcube, *fallback_argskwargs['args'], **fallback_argskwargs['kwargs'])
# fallback: use linear interpolation and set errors to an array of zeros
im_homo = self.interpolate_cube(arrcube, *fallback_argskwargs['args'], **fallback_argskwargs['kwargs'])
errors = np.zeros_like(im_homo, dtype=np.int16)
else:
raise exc
return outarr
return im_homo, errors
class SpectralResampler(object):
......@@ -365,7 +373,7 @@ class SpectralResampler(object):
class KMeansRSImage(object):
"""Class for clustering a giveb input image by using K-Means algorithm."""
"""Class for clustering a given input image by using K-Means algorithm."""
def __init__(self, im, n_clusters, CPUs=1, v=False):
# type: (GeoArray, int) -> None
......@@ -465,12 +473,13 @@ class KMeansRSImage(object):
plt.show()
def get_random_spectra_from_each_cluster(self, samplesize=50, src_im=None):
# type: (int) -> dict
# type: (int, GeoArray) -> dict
"""Returns a given number of spectra randomly selected within each cluster.
E.g., 50 spectra of belonging to cluster 1, 50 spectra of belonging to cluster 2 and so on.
:param samplesize: number of spectra to be randomly selected from each cluster
:param src_im: image to get random samples from (default: self.im)
:return:
"""
src_im = src_im if src_im is not None else self.im
......@@ -490,7 +499,7 @@ class KMeansRSImage(object):
class TrainingData(object):
"""Class for analyzing statistical relations between a pair of machine leaning training data cubes."""
"""Class for analyzing statistical relations between a pair of machine learning training data cubes."""
def __init__(self, im_X, im_Y, test_size):
# type: (Union[GeoArray, np.ndarray], Union[GeoArray, np.ndarray], Union[float, int]) -> None
"""Get instance of TrainingData.
......@@ -818,7 +827,8 @@ class ReferenceCube_Generator(object):
# type: (List[str], List[Tuple[str, str]], str, int, int, bool, logging.Logger, Union[None, int]) -> None
"""Initialize ReferenceCube_Generator.
:param filelist_refs: list of (hyperspectral) reference images
:param filelist_refs: list of (hyperspectral) reference images,
representing BOA reflectance, scaled between 0 and 10000
:param tgt_sat_sen_list: list satellite/sensor tuples containing those sensors for which the reference cube
is to be computed, e.g. [('Landsat-8', 'OLI_TIRS',), ('Landsat-5', 'TM')]
:param dir_refcubes: output directory for the generated reference cube
......@@ -1012,7 +1022,7 @@ class ReferenceCube_Generator(object):
return random_samples
def resample_spectra(self, spectra, src_cwl, tgt_srf):
# type: (Union[GeoArray, np.ndarray], Union[list, np.array], SRF, bool) -> np.ndarray
# type: (Union[GeoArray, np.ndarray], Union[list, np.array], SRF) -> np.ndarray
"""Perform spectral resampling of the given image to match the given spectral response functions.
:param spectra: 2D array (rows: spectral samples; columns: spectral information / bands
......@@ -1074,7 +1084,6 @@ class RefCube(object):
:param sensor: the sensor for which the reference cube holds its spectral data
:param LayerBandsAssignment: the LayerBandsAssignment for which the reference cube holds its spectral data
"""
# type: (str, str, str, list) -> None
# privates
self._col_imName_dict = dict()
......@@ -1292,6 +1301,8 @@ class Classifier_Generator(object):
for src_LBA in src_derived_LBAs:
for tgt_LBA in tgt_derived_LBAs:
# Get training data for source and target image according to the given LayerBandsAssignments
# e.g., source: Landsat 7 image in LBA 1__2__3__4__5__7 and target L8 in 1__2__3__4__5__6__7
src_data = src_cube.get_band_combination(src_LBA)
tgt_data = tgt_cube.get_band_combination(tgt_LBA)
......@@ -1306,8 +1317,31 @@ class Classifier_Generator(object):
ML = specHomoApproaches[method]()
ML.fit(train_X, train_Y, *args, **kwargs)
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true), axis=0) * 100
# compute RMSE, MAE, MAPE
from sklearn.metrics import mean_squared_error, mean_absolute_error
predicted = ML.predict(test_X) # returns 2D array (spectral samples x bands), e.g. 640 x 6
# NOTE: 'raw_values': RMSE is column-wise computed
# => yields the same result as one would compute the RMSE band by band
rmse = np.sqrt(mean_squared_error(test_Y, predicted, multioutput='raw_values'))
mae = mean_absolute_error(test_Y, predicted, multioutput='raw_values')
mape = mean_absolute_percentage_error(test_Y, predicted)
# predicted_train = ML.predict(train_X)
# rmse_train = np.sqrt(mean_squared_error(train_Y, predicted_train, multioutput='raw_values'))
# # v2
# test_Y_im = spectra2im(test_Y, tgt_rows=int(tgt_data.rows * 0.4), tgt_cols=tgt_data.cols)
# pred_im = spectra2im(predicted, tgt_rows=int(tgt_data.rows * 0.4), tgt_cols=tgt_data.cols)
# rmse_v2 = np.sqrt(mean_squared_error(test_Y_im[:,:,1], pred_im[:,:,1]))
# append some metadata
ML.scores = dict(train=ML.score(train_X, train_Y), test=ML.score(test_X, test_Y))
ML.scores = dict(train=ML.score(train_X, train_Y), test=ML.score(test_X, test_Y)) # r2 scores
ML.rmse_per_band = list(rmse)
ML.mae_per_band = list(mae)
ML.mape_per_band = list(mape)
ML.src_satellite = src_cube.satellite
ML.src_sensor = src_cube.sensor
ML.tgt_satellite = tgt_cube.satellite
......@@ -1345,6 +1379,11 @@ class ClassifierCollection(object):
self.content = dill.load(inF)
def __repr__(self):
"""Returns representation of ClassifierCollection.
:return: e.g., "{'1__2__3__4__5__7': {('Landsat-5', 'TM'): {'1__2__3__4__5__7':
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)}, ..."
"""
return pformat(self.content)
def __getitem__(self, item):
......@@ -1354,6 +1393,7 @@ class ClassifierCollection(object):
class RSImage_Predictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described be the given args."""
def __init__(self, method='LR', classifier_rootDir=''):
# type: (str, str) -> None
"""Get an instance of RSImage_Predictor.
:param method: machine learning approach to be used for spectral bands prediction
......@@ -1361,7 +1401,6 @@ class RSImage_Predictor(object):
'RR': Ridge Regression
:param classifier_rootDir: root directory where machine learning classifiers are stored.
"""
# type (str, str) -> None
self.method = method
self.classifier_rootDir = os.path.abspath(classifier_rootDir)
......@@ -1403,28 +1442,62 @@ class RSImage_Predictor(object):
@staticmethod
def predict(image, classifier, nodataVal=None, CPUs=1):
# type: (Union[np.ndarray, GeoArray], any, float, int) -> GeoArray
"""Apply the prediction function of the given specifier to the given remote sensing image.
:param image: 3D array representing the input image
:param classifier: the classifier instance
:param nodataVal: no data value of the input image
:param nodataVal: no data value of the input image (ignored if image is a GeoArray with existing nodata value)
:param CPUs: CPUs to use (default: 1)
:return: 3D array representing the predicted spectral image cube
"""
image = image if isinstance(image, GeoArray) else GeoArray(image, nodata=nodataVal)
image.nodata = image.nodata if image.nodata is not None else nodataVal
#
spectra = im2spectra(image)
# adjust classifier
if CPUs is None or CPUs > 1:
classifier.n_jobs = cpu_count() if CPUs is None else CPUs
# predict!
spectra_predicted = classifier.predict(spectra).astype(image.dtype)
# 2D -> 3D
image_predicted = spectra2im(spectra_predicted, tgt_rows=image.shape[0], tgt_cols=image.shape[1])
image_predicted = GeoArray(spectra2im(spectra_predicted, tgt_rows=image.shape[0], tgt_cols=image.shape[1]))
# re-apply nodata values to predicted result
if nodataVal is not None:
im_Y_gA = GeoArray(image, nodata=nodataVal)
image_predicted[im_Y_gA.mask_nodata[:] == 0] = nodataVal
if image.nodata is not None:
image_predicted[image.mask_nodata[:] == 0] = image.nodata
return image_predicted
@staticmethod
def compute_prediction_errors(im_predicted, classifier, nodataVal=None):
# type: (Union[np.ndarray, GeoArray], any, float) -> np.ndarray
"""Compute errors that quantify prediction inaccurracy per band and per pixel.
:param im_predicted: 3D array representing the predicted image
:param classifier: the classifier instance
:param nodataVal: no data value of the input image
(ignored if image is a GeoArray with existing nodata value)
:return: 3D array (int16) representing prediction errors per band and pixel
"""
im_predicted = im_predicted if isinstance(im_predicted, GeoArray) else GeoArray(im_predicted, nodata=nodataVal)
im_predicted.nodata = im_predicted.nodata if im_predicted.nodata is not None else nodataVal
if not len(classifier.rmse_per_band) == GeoArray(im_predicted).bands:
raise ValueError('The given classifier contains error statistics incompatible to the shape of the image.')
# compute errors
# TODO validate this equation
# NOTE: 10000 is the BOA reflectance scaling factor
errors = (im_predicted[:] * classifier.rmse_per_band / 10000).astype(np.int16)
# re-apply nodata values to predicted result
if im_predicted.nodata is not None:
errors[im_predicted.mask_nodata[:] == 0] = im_predicted.nodata
return errors
......@@ -44,6 +44,7 @@ class Dataset(object):
self._pathGen = None
self._ac_options = {}
self._ac_errors = None
self._spec_homo_errors = None
# defaults
self.proc_level = 'init'
......@@ -413,6 +414,38 @@ class Dataset(object):
def ac_errors(self):
self._ac_errors = None
@property
def spec_homo_errors(self):
"""Returns an instance of GeoArray containing error information calculated during spectral homogenization.
:return:
"""
return self._spec_homo_errors # FIXME should give a warning if None
@spec_homo_errors.setter
def spec_homo_errors(self, *geoArr_initArgs):
if geoArr_initArgs[0] is not None:
errArr = GeoArray(*geoArr_initArgs)
assert errArr.shape == self.arr.shape, "The 'spec_homo_errors' GeoArray can only be instanced with an " \
"array of the same dimensions like GMS_obj.arr. Got %s." \
% str(errArr.shape)
if errArr._nodata is None:
errArr.nodata = DEF_D.get_outFillZeroSaturated(errArr.dtype)[0]
errArr.gt = self.arr.gt
errArr.prj = self.arr.prj
errArr.bandnames = self.LBA2bandnames(self.LayerBandsAssignment)
self._spec_homo_errors = errArr
else:
del self.ac_errors
@spec_homo_errors.deleter
def spec_homo_errors(self):
self._spec_homo_errors = None
@property
def subset(self):
return [self.arr_shape, self.arr_pos]
......
ENVI
description = {
/home/gfz-fe/scheffler/python/gms_preprocessing/tests/data/spechomo_inputs/refcube__Landsat-5__TM__nclust50__nsamp100.bsq}
samples = 16
lines = 100
bands = 6
header offset = 0
file type = ENVI Standard
data type = 4
interleave = bsq
byte order = 0
map info = {Arbitrary, 1, 1, 0, 0, 1, 1, 0, North}
band names = {
Band 1,
Band 2,
Band 3,
Band 4,
Band 5,
Band 6}
{
"satellite": "Landsat-5",
"sensor": "TM",
"filepath": "/home/gfz-fe/scheffler/temp/SPECHOM_py/CUBE/refcube__Landsat-5__TM__nclust50__nsamp20000.bsq",
"n_signatures": 100,
"n_images": 16,
"col_imName_dict": {
"0": "Alps_snow_rocks_Apex",
"1": "Balaton_lake_clouds_rural_Apex",
"2": "Balaton_lake_clouds_rural_Apex_CLOUDfree",
"3": "Bavaria_farmland_LMU_Hyspex",
"4": "Bruessels_urban_forest",
"5": "Coral_French_Frigate_Shoals_Hawaii_Aviris",
"6": "Coral_French_Frigate_Shoals_Hawaii_Aviris_CLOUDfree",
"7": "Costa_rica_Hymap",
"8": "Costa_rica_Hymap_CLOUDfree",
"9": "Goodsprings_nevada_desert_Aviris",
"10": "Mullewa_Hymap",
"11": "Plumas_national_forest_CA_Aviris",
"12": "SouthAfrica_Arundale_bbr3_selsmo_Hymap",
"13": "canada_arctic_derek_Hymap",
"14": "isabena_spain_Hyspex",
"15": "karlsruhe_forest_urban_Hymap"
},
"LayerBandsAssignment": [
"1",
"2",
"3",
"4",
"5",
"7"
]
}
\ No newline at end of file
ENVI
description = {
/home/gfz-fe/scheffler/python/gms_preprocessing/tests/data/spechomo_inputs/refcube__Landsat-8__OLI_TIRS__nclust50__nsamp100.bsq}
samples = 16
lines = 100
bands = 9
header offset = 0
file type = ENVI Standard
data type = 4
interleave = bsq
byte order = 0
map info = {Arbitrary, 1, 1, 0, 0, 1, 1, 0, North}
band names = {
Band 1,
Band 2,
Band 3,
Band 4,
Band 5,
Band 6,
Band 7,
Band 8,
Band 9}
{
"satellite": "Landsat-8",
"sensor": "OLI_TIRS",
"filepath": "/home/gfz-fe/scheffler/temp/SPECHOM_py/CUBE/refcube__Landsat-8__OLI_TIRS__nclust50__nsamp20000.bsq",
"n_signatures": 100,
"n_images": 16,
"col_imName_dict": {
"0": "Alps_snow_rocks_Apex",
"1": "Balaton_lake_clouds_rural_Apex",
"2": "Balaton_lake_clouds_rural_Apex_CLOUDfree",
"3": "Bavaria_farmland_LMU_Hyspex",
"4": "Bruessels_urban_forest",
"5": "Coral_French_Frigate_Shoals_Hawaii_Aviris",
"6": "Coral_French_Frigate_Shoals_Hawaii_Aviris_CLOUDfree",
"7": "Costa_rica_Hymap",
"8": "Costa_rica_Hymap_CLOUDfree",
"9": "Goodsprings_nevada_desert_Aviris",
"10": "Mullewa_Hymap",
"11": "Plumas_national_forest_CA_Aviris",
"12": "SouthAfrica_Arundale_bbr3_selsmo_Hymap",
"13": "canada_arctic_derek_Hymap",
"14": "isabena_spain_Hyspex",
"15": "karlsruhe_forest_urban_Hymap"
},
"LayerBandsAssignment": [
"1",
"2",
"3",
"4",
"5",
"9",
"6",
"7",
"8"
]
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment