Commit 7d756443 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Removed classes 'Classifier_Generator' and 'RSImage_Predictor'. Homogenization...

Removed classes 'Classifier_Generator' and 'RSImage_Predictor'. Homogenization without clustering is now done with RSImage_ClusterPredictor with n_clusters=1.
Removed corresponding tests.
Updated classifier database.
parent dacc2fd2
......@@ -23,6 +23,9 @@ import dill
from pprint import pformat
from nested_dict import nested_dict
import traceback
import zipfile
import tempfile
import time
from sklearn.cluster import k_means_ # noqa F401 # flake8 issue
from sklearn.model_selection import train_test_split
......@@ -1189,7 +1192,7 @@ class RefCube(object):
json.dump(meta2write, metaF, separators=(',', ': '), indent=4)
class Classifier_Generator(object):
class ClusterClassifier_Generator(object):
"""Class for creating collections of machine learning classifiers that can be used for spectral homogenization."""
def __init__(self, list_refcubes):
# type: (List[Union[str, RefCube]]) -> None
......@@ -1296,67 +1299,6 @@ class Classifier_Generator(object):
return ML
def create_classifiers(self, outDir, method='LR', **kwargs):
# type: (str, str, dict) -> None
"""Create classifiers for all combinations of the reference cubes given in __init__().
:param outDir: output directory for the created classifier collections
:param method: type of machine learning classifiers to be included in classifier collections
'LR': Linear Regression
'RR': Ridge Regression
'QR': Quadratic Regression
:param kwargs: keyword arguments to be passed to the __init__() function of machine learners
"""
for src_cube in self.refcubes:
cls_collection = nested_dict()
fName_cls = get_filename_classifier_collection(method, src_cube.satellite, src_cube.sensor)
for tgt_cube in self.refcubes:
if (src_cube.satellite, src_cube.sensor) == (tgt_cube.satellite, tgt_cube.sensor):
continue
print("Creating %s classifier to predict %s %s from %s %s..."
% (method, tgt_cube.satellite, tgt_cube.sensor, src_cube.satellite, src_cube.sensor))
src_derived_LBAs = self._get_derived_LayerBandsAssignments(src_cube.satellite, src_cube.sensor)
tgt_derived_LBAs = self._get_derived_LayerBandsAssignments(tgt_cube.satellite, tgt_cube.sensor)
# possib_LBA_names = src_derived_LBAs.keys() # would be the same for tgt_derived_LBA
for src_LBA in src_derived_LBAs:
for tgt_LBA in tgt_derived_LBAs:
# Get training data for source and target image according to the given LayerBandsAssignments
# e.g., source: Landsat 7 image in LBA 1__2__3__4__5__7 and target L8 in 1__2__3__4__5__6__7
src_data = src_cube.get_band_combination(src_LBA)
tgt_data = tgt_cube.get_band_combination(tgt_LBA)
# Set train and test variables
# NOTE: If random_state is set to an Integer,
# train_test_split will always select the same 'pseudo-random' set of the input data.
train_X, test_X, train_Y, test_Y = \
train_test_split(im2spectra(src_data), im2spectra(tgt_data),
test_size=0.4, shuffle=True, random_state=0)
# train the learner
ML = self.train_machine_learner(train_X, train_Y, test_X, test_Y, method, **kwargs)
# add some metadata
ML.src_satellite = src_cube.satellite
ML.src_sensor = src_cube.sensor
ML.tgt_satellite = tgt_cube.satellite
ML.tgt_sensor = tgt_cube.sensor
ML.src_LBA = src_LBA
ML.tgt_LBA = tgt_LBA
ML.src_n_bands = len(ML.src_LBA)
ML.tgt_n_bands = len(ML.tgt_LBA)
# append to classifier collection
cls_collection['__'.join(src_LBA)][tgt_cube.satellite, tgt_cube.sensor]['__'.join(tgt_LBA)] = ML
# dump to disk
with open(os.path.join(outDir, fName_cls), 'wb') as outF:
dill.dump(cls_collection.to_dict(), outF)
class ClusterClassifier_Generator(Classifier_Generator):
def cluster_refcube_spectra(self, n_clusters=50, CPUs=24):
# type: (int, int) -> np.ndarray
"""Generate cluster labels for the reference cubes passed to __init__().
......@@ -1496,14 +1438,11 @@ def get_machine_learner(method='LR', **init_params):
# '__to__' + '__'.join([tgt_satellite, tgt_sensor, tgt_LBA_name]) + '.dill'
def get_filename_classifier_collection(method, src_satellite, src_sensor, n_clusters=0):
def get_filename_classifier_collection(method, src_satellite, src_sensor, n_clusters=1):
if method == 'RR':
method += '_alpha1.0' # TODO add to config
if n_clusters:
method += '_clust%s' % n_clusters
return '__'.join([method, src_satellite, src_sensor]) + '.dill'
return '__'.join(['%s_clust%s' % (method, n_clusters), src_satellite, src_sensor]) + '.dill'
class ClassifierCollection(object):
......@@ -1523,137 +1462,6 @@ class ClassifierCollection(object):
return self.content[item]
class RSImage_Predictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described be the given args."""
def __init__(self, method='LR', classifier_rootDir=''):
# type: (str, str) -> None
"""Get an instance of RSImage_Predictor.
:param method: machine learning approach to be used for spectral bands prediction
'LR': Linear Regression
'RR': Ridge Regression
:param classifier_rootDir: root directory where machine learning classifiers are stored.
"""
self.method = method
self.classifier_rootDir = os.path.abspath(classifier_rootDir)
def get_classifier(self, src_satellite, src_sensor, src_LBA, tgt_satellite, tgt_sensor, tgt_LBA):
# type: (str, str, list, str, str, list) -> any
"""Select the correct machine learning classifier out of previously saves classifier collections.
Describe the classifier specifications with the given arguments.
:param src_satellite: source satellite, e.g., 'Landsat-8'
:param src_sensor: source sensor, e.g., 'OLI_TIRS'
:param src_LBA: source LayerBandsAssignment
:param tgt_satellite: target satellite, e.g., 'Landsat-8'
:param tgt_sensor: target sensor, e.g., 'OLI_TIRS'
:param tgt_LBA: target LayerBandsAssignment
:return: classifier instance loaded from disk
"""
# fName_cls = get_classifier_filename(self.method, src_satellite, src_sensor, tgt_satellite, tgt_sensor)
fName_cls = get_filename_classifier_collection(self.method, src_satellite, src_sensor)
path_cls = os.path.join(self.classifier_rootDir, fName_cls)
if not os.path.isfile(path_cls):
raise FileNotFoundError('No classifier available for the given specification at %s.' % path_cls)
try:
ML_instance = \
ClassifierCollection(path_cls)['__'.join(src_LBA)][tgt_satellite, tgt_sensor]['__'.join(tgt_LBA)]
except KeyError:
raise ClassifierNotAvailableError(self.method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA)
# validation
expected_type = type(get_machine_learner(self.method))
if not isinstance(ML_instance, expected_type):
raise ValueError('The given dillFile %s does not contain an instance of %s but %s.'
% (os.path.basename(fName_cls), expected_type.__name__, type(ML_instance)))
return ML_instance
@staticmethod
def predict(image, classifier, nodataVal=None, CPUs=1):
# type: (Union[np.ndarray, GeoArray], any, float, int) -> GeoArray
"""Apply the prediction function of the given specifier to the given remote sensing image.
:param image: 3D array representing the input image
:param classifier: the classifier instance
:param nodataVal: no data value of the input image (ignored if image is a GeoArray with existing nodata value)
:param CPUs: CPUs to use (default: 1)
:return: 3D array representing the predicted spectral image cube
"""
image = image if isinstance(image, GeoArray) else GeoArray(image, nodata=nodataVal)
image.nodata = image.nodata if image.nodata is not None else nodataVal
# adjust classifier
if CPUs is None or CPUs > 1:
# FIXME does not work -> parallelize with https://github.com/ajtulloch/sklearn-compiledtrees?
classifier.n_jobs = cpu_count() if CPUs is None else CPUs
# apply prediction
# NOTE: prediction is applied in 1000 x 1000 tiles to save memory (because classifier.predict returns float32)
image_predicted = GeoArray(np.empty((image.rows, image.cols, classifier.tgt_n_bands), dtype=image.dtype),
geotransform=image.gt, projection=image.prj, nodata=image.nodata)
from time import time
t0 = time()
for ((rS, rE), (cS, cE)), im_tile in image.tiles(tilesize=(1000, 1000)):
# 3D -> 2D
spectra = im2spectra(im_tile)
# predict!
spectra_pred = classifier.predict(spectra).astype(image.dtype)
# 2D -> 3D
tiledata_pred = spectra2im(spectra_pred, tgt_rows=im_tile.shape[0], tgt_cols=im_tile.shape[1])
image_predicted[rS:rE + 1, cS:cE + 1] = tiledata_pred
print(time()-t0)
# re-apply nodata values to predicted result
if image.nodata is not None:
image_predicted[image.mask_nodata[:] == 0] = image.nodata
# copy mask_nodata
image_predicted.mask_nodata = image.mask_nodata
GeoArray(image_predicted).save(
'/home/gfz-fe/scheffler/temp/SPECHOM_py/image_predicted_QRnoclust_MinDist_noB9.bsq')
return image_predicted
@staticmethod
def compute_prediction_errors(im_predicted, classifier, nodataVal=None):
# type: (Union[np.ndarray, GeoArray], any, float) -> np.ndarray
"""Compute errors that quantify prediction inaccurracy per band and per pixel.
:param im_predicted: 3D array representing the predicted image
:param classifier: the classifier instance
:param nodataVal: no data value of the input image
(ignored if image is a GeoArray with existing nodata value)
:return: 3D array (int16) representing prediction errors per band and pixel
"""
im_predicted = im_predicted if isinstance(im_predicted, GeoArray) else GeoArray(im_predicted, nodata=nodataVal)
im_predicted.nodata = im_predicted.nodata if im_predicted.nodata is not None else nodataVal
if not len(classifier.rmse_per_band) == GeoArray(im_predicted).bands:
raise ValueError('The given classifier contains error statistics incompatible to the shape of the image.')
# compute errors
# TODO validate this equation
# NOTE: 10000 is the BOA reflectance scaling factor
errors = (im_predicted[:] * classifier.rmse_per_band / 10000).astype(np.int16)
# re-apply nodata values to predicted result
if im_predicted.nodata is not None:
errors[im_predicted.mask_nodata[:] == 0] = im_predicted.nodata
GeoArray(errors).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/errors_QRnoclust_MinDist_noB9.bsq')
return errors
class RSImage_ClusterPredictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described by the given args."""
def __init__(self, method='LR', n_clusters=50, classif_alg='MinDist', kNN_n_neighbors=10, classifier_rootDir='',
......@@ -1701,31 +1509,35 @@ class RSImage_ClusterPredictor(object):
:param tgt_LBA: target LayerBandsAssignment
:return: classifier instance loaded from disk
"""
# fName_cls = get_classifier_filename(self.method, src_satellite, src_sensor, tgt_satellite, tgt_sensor)
fName_cls = \
get_filename_classifier_collection(self.method, src_satellite, src_sensor, n_clusters=self.n_clusters)
path_cls = os.path.join(self.classifier_rootDir, fName_cls)
if not os.path.isfile(path_cls):
raise FileNotFoundError('No classifier available for the given specification at %s.' % path_cls)
try:
dict_clust_MLinstances = \
ClassifierCollection(path_cls)['__'.join(src_LBA)][tgt_satellite, tgt_sensor]['__'.join(tgt_LBA)]
except KeyError:
raise ClassifierNotAvailableError(self.method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA)
# get path of classifier zip archive
path_classifier_zip = os.path.join(self.classifier_rootDir, '%s_classifiers.zip' % self.method)
if not os.path.isfile(path_classifier_zip):
raise FileNotFoundError("No '%s' classifiers available at %s." % (self.method, path_classifier_zip))
# create an instance of ClusterLearner by reading the requested classifier from the zip archive
with zipfile.ZipFile(path_classifier_zip, "r") as zf, tempfile.TemporaryDirectory() as td:
# read requested classifier from zip archive and create a ClassifierCollection
fName_cls = \
get_filename_classifier_collection(self.method, src_satellite, src_sensor, n_clusters=self.n_clusters)
try:
zf.extract(fName_cls, td)
path_cls = os.path.join(td, fName_cls)
dict_clust_MLinstances = \
ClassifierCollection(path_cls)['__'.join(src_LBA)][tgt_satellite, tgt_sensor]['__'.join(tgt_LBA)]
except KeyError:
raise ClassifierNotAvailableError(self.method, src_satellite, src_sensor, src_LBA,
tgt_satellite, tgt_sensor, tgt_LBA, self.n_clusters)
# validation
expected_MLtype = type(get_machine_learner(self.method))
for label, ml in dict_clust_MLinstances.items():
if not isinstance(ml, expected_MLtype):
raise ValueError("The given dillFile %s contains a spectral cluster (label '%s') with a %s machine "
"learner instead of the expected %s."
% (os.path.basename(fName_cls), label, type(ml), expected_MLtype.__name__,))
# validation
expected_MLtype = type(get_machine_learner(self.method))
for label, ml in dict_clust_MLinstances.items():
if not isinstance(ml, expected_MLtype):
raise ValueError("The given dillFile %s contains a spectral cluster (label '%s') with a %s machine "
"learner instead of the expected %s."
% (os.path.basename(fName_cls), label, type(ml), expected_MLtype.__name__,))
return Cluster_Learner(dict_clust_MLinstances)
return Cluster_Learner(dict_clust_MLinstances)
def classify_image(self, image, cluster_classifier, nodataVal=None, tiledims=(1000, 1000)):
# type: (Union[np.ndarray, GeoArray], Cluster_Learner, Union[int, float], tuple) -> GeoArray
......@@ -1777,10 +1589,9 @@ class RSImage_ClusterPredictor(object):
# assign each input pixel to a cluster (compute classfication with cluster centers as endmembers)
if not self.classif_map:
if self.n_clusters > 1:
from time import time
t0 = time()
t0 = time.time()
self.classif_map = self.classify_image(image, classifier, nodataVal=nodataVal)
print(time() - t0)
print('Total classification time: %s' % time.strftime("%H:%M:%S", time.gmtime(time.time() - t0)))
else:
self.classif_map = np.full((image.rows, image.cols), classifier.cluster_pixVals[0], np.int8)
......@@ -1793,9 +1604,8 @@ class RSImage_ClusterPredictor(object):
# NOTE: prediction is applied in 1000 x 1000 tiles to save memory (because classifier.predict returns float32)
image_predicted = GeoArray(np.empty((image.rows, image.cols, classifier.tgt_n_bands), dtype=image.dtype),
geotransform=image.gt, projection=image.prj, nodata=image.nodata)
from time import time
t0 = time()
print('predicting...')
t0 = time.time()
for ((rS, rE), (cS, cE)), im_tile in image.tiles(tilesize=(1000, 1000)):
print('Predicting tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
......@@ -1805,7 +1615,8 @@ class RSImage_ClusterPredictor(object):
im_tile_pred = classifier.predict(im_tile, classif_map_tile, nodataVal=nodataVal).astype(image.dtype)
image_predicted[rS:rE + 1, cS:cE + 1] = im_tile_pred
print(time()-t0)
print('Total prediction time: %s' % time.strftime("%H:%M:%S", time.gmtime(time.time()-t0)))
# re-apply nodata values to predicted result
if image.nodata is not None:
image_predicted[image.mask_nodata[:] == 0] = image.nodata
......@@ -1813,10 +1624,10 @@ class RSImage_ClusterPredictor(object):
# copy mask_nodata
image_predicted.mask_nodata = image.mask_nodata
GeoArray(image_predicted).save(
'/home/gfz-fe/scheffler/temp/SPECHOM_py/image_predicted_QRclust1_MinDist_noB9.bsq')
GeoArray(self.classif_map).save(
'/home/gfz-fe/scheffler/temp/SPECHOM_py/classif_map_QRclust1_MinDist_noB9.bsq')
# GeoArray(image_predicted).save(
# '/home/gfz-fe/scheffler/temp/SPECHOM_py/image_predicted_QRclust1_MinDist_noB9.bsq')
# GeoArray(self.classif_map).save(
# '/home/gfz-fe/scheffler/temp/SPECHOM_py/classif_map_QRclust1_MinDist_noB9.bsq')
return image_predicted
......@@ -1860,7 +1671,7 @@ class RSImage_ClusterPredictor(object):
# errors[im_predicted == im_predicted.nodata] = im_predicted.nodata
errors[im_predicted.mask_nodata.astype(np.int8) == 0] = im_predicted.nodata
GeoArray(errors).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/errors_LRclust1_MinDist_noB9_clusterpred.bsq')
# GeoArray(errors).save('/home/gfz-fe/scheffler/temp/SPECHOM_py/errors_LRclust1_MinDist_noB9_clusterpred.bsq')
return errors
......@@ -1890,28 +1701,6 @@ class Cluster_Learner(object):
for cluster in self.cluster_pixVals:
yield self.MLdict[cluster]
def predict(self, im_src, cmap, nodataVal=None):
"""
:param im_src:
:param cmap: classification map that assigns each image spectrum to its corresponding cluster
-> must be a 1D np.ndarray with the same Y-dimension like src_spectra
:param nodataVal:
:return:
"""
im_pred = np.empty((im_src.shape[0], im_src.shape[1], self.tgt_n_bands), dtype=im_src.dtype)
# iterate over all cluster labels and apply corresponding machine learner parameters to predict target spectra
for pixVal in sorted(list(np.unique(cmap))):
if pixVal == nodataVal:
continue
classifier = self.MLdict[pixVal]
mask_pixVal = cmap == pixVal
im_pred[mask_pixVal] = classifier.predict(im_src[mask_pixVal]).astype(im_src.dtype)
return im_pred
def predict(self, im_src, cmap, nodataVal=None):
"""
......
......@@ -4,7 +4,7 @@ Algorithms for multispectral image classification.
"""
import numpy as np
from typing import Union, List
from typing import Union, List # noqa F401 # flake8 issue
from multiprocessing import Pool
from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment