Commit 1ca747a1 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Image classifiers MinDist, SAM and SID can now return distance metrics.

parent 4f9923b9
Pipeline #3883 failed with stage
in 2 minutes and 16 seconds
......@@ -77,7 +77,7 @@ class L2B_object(L2A_object):
# perform spectral homogenization of image data #
#################################################
SpH = SpectralHomogenizer(classifier_rootDir=CFG.path_spechomo_classif, logger=self.logger)
SpH = SpectralHomogenizer(classifier_rootDir=CFG.path_spechomo_classif, logger=self.logger, CPUs=CFG.CPUs)
if method == 'LI' or CFG.datasetid_spectral_ref is None:
# linear interpolation (if intended by user or in case of custom spectral characteristics of target sensor)
......
......@@ -10,6 +10,7 @@ from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MaxAbsScaler
from numba import jit
from geoarray import GeoArray
from py_tools_ds.numeric.array import get_array_tilebounds
......@@ -41,6 +42,7 @@ class _ImageClassifier(object):
self.clf = None # to be implemented by the subclass
self.cmap = None
self.clf_name = ''
self._distance_metrics = None
def _predict(self, tilepos):
raise NotImplementedError('This method has to be implemented by the subclass.')
......@@ -54,33 +56,40 @@ class _ImageClassifier(object):
:param tiledims:
:return:
"""
dtype_cmap = np.array(self.train_labels).dtype
if cmap_nodataVal is not None and not np.can_cast(cmap_nodataVal, dtype_cmap):
dtype_cmap = np.find_common_type(np.array(self.train_labels), np.array([cmap_nodataVal]))
image_cube_gA = GeoArray(image_cube, nodata=in_nodataVal)
image_cube_gA.to_mem()
bounds_alltiles = get_array_tilebounds(image_cube_gA.shape, tiledims)
# use a local variable to avoid pickling in multiprocessing
cmap = GeoArray(np.empty((image_cube_gA.rows, image_cube_gA.cols),
dtype=np.array(self.train_labels).dtype), nodata=cmap_nodataVal)
cmap = GeoArray(np.empty((image_cube_gA.rows, image_cube_gA.cols), dtype=dtype_cmap), nodata=cmap_nodataVal)
dist = np.empty((image_cube_gA.rows, image_cube_gA.cols), dtype=np.float32)
print('Performing %s image classification...' % self.clf_name)
if self.CPUs is None or self.CPUs > 1:
with Pool(self.CPUs, initializer=initializer, initargs=(self.train_spectra, image_cube_gA)) as pool:
tiles_cm = pool.map(self._predict, bounds_alltiles)
for ((rS, rE), (cS, cE)), tile_cm in tiles_cm:
cmap[rS: rE + 1, cS: cE + 1] = tile_cm
tiles_results = pool.map(self._predict, bounds_alltiles)
else:
initializer(self.train_spectra, image_cube_gA)
for (rS, rE), (cS, cE) in tqdm(bounds_alltiles):
# print('Performing classification for tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
cmap[rS: rE + 1, cS: cE + 1] = self._predict(((rS, rE), (cS, cE)))[1]
tiles_results = [self._predict(bounds) for bounds in tqdm(bounds_alltiles)]
for tile_res in tiles_results:
((rS, rE), (cS, cE)), tile_cm = tile_res[:2]
cmap[rS: rE + 1, cS: cE + 1] = tile_cm
if len(tile_res) == 3:
dist[rS: rE + 1, cS: cE + 1] = tile_res[2]
if cmap_nodataVal is not None:
cmap[image_cube_gA.mask_nodata.astype(np.int8) == 0] = cmap_nodataVal
self.cmap = cmap.astype(image_cube.dtype)
self.cmap = cmap
if len(tiles_results[0]) == 3:
self._distance_metrics = dist
return self.cmap
......@@ -98,7 +107,7 @@ class MinimumDistance_Classifier(_ImageClassifier):
# type: (np.ndarray, Union[np.ndarray, List[int]], Union[int, None], dict) -> None
if CPUs is None or CPUs > 1:
CPUs = 1 # The NearestCentroid seens to parallelize automatically. So using multiprocessing is slower.
CPUs = 1 # The NearestCentroid seems to parallelize automatically. So using multiprocessing is slower.
super(MinimumDistance_Classifier, self).__init__(train_spectra, train_labels, CPUs=CPUs)
......@@ -106,6 +115,26 @@ class MinimumDistance_Classifier(_ImageClassifier):
self.clf = NearestCentroid(**kwargs)
self.clf.fit(train_spectra, train_labels)
self.class_centroids = self.clf.centroids_
@property
def euclidian_distance(self):
return self._distance_metrics
@jit
def compute_euclidian_distance_jit(self, imdata, cmap):
# spectra = im2spectra(imdata)
spectra = imdata.reshape((imdata.shape[0] * imdata.shape[1], imdata.shape[2]))
distances = np.empty(np.dot(*imdata.shape[:2]), np.float32)
labels = cmap.flatten()
for lbl in np.unique(cmap):
mask = labels == lbl
centroid = self.class_centroids[list(self.train_labels).index(lbl), :].reshape(1, -1).astype(np.float)
diff = spectra[mask, :] - centroid
distances[mask] = np.sqrt((diff ** 2).sum(axis=1))
return distances.reshape(*imdata.shape[:2])
def _predict(self, tilepos):
assert global_shared_im2classify is not None
......@@ -113,7 +142,10 @@ class MinimumDistance_Classifier(_ImageClassifier):
tileimdata = global_shared_im2classify[rS: rE + 1, cS: cE + 1, :]
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
cmap = self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
dist = self.compute_euclidian_distance_jit(tileimdata.astype(np.float32), cmap)
return tilepos, cmap, dist
class kNN_Classifier(_ImageClassifier):
......@@ -132,7 +164,7 @@ class kNN_Classifier(_ImageClassifier):
tileimdata = global_shared_im2classify[rS: rE + 1, cS: cE + 1, :]
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2]), None
class SAM_Classifier(_ImageClassifier):
......@@ -142,6 +174,14 @@ class SAM_Classifier(_ImageClassifier):
self.clf_name = 'spectral angle mapper (SAM)'
@property
def angles_rad(self):
return self._distance_metrics
@property
def angles_deg(self):
return np.rad2deg(self._distance_metrics) if self._distance_metrics is not None else None
def _predict(self, tilepos):
assert global_shared_endmembers is not None and global_shared_im2classify is not None
......@@ -159,15 +199,17 @@ class SAM_Classifier(_ImageClassifier):
angles = np.zeros((tileimdata.shape[0], tileimdata.shape[1], self.n_samples), np.float)
# if np.std(tileimdata) == 0: # skip tiles that only contain the same value
# loop over all training spectra and compute spectral angle for each pixel
for n_sample in range(self.n_samples):
train_spectrum = train_spectra_norm[n_sample, :].reshape(1, 1, self.n_features)
angles[:, :, n_sample] = self._calc_sam(tileimdata_norm,
train_spectrum,
axis=2)
angles_min = np.min(angles, axis=2).astype(np.float32)
cmap = np.argmin(angles, axis=2).astype(np.int16)
return tilepos, cmap
return tilepos, cmap, angles_min
@staticmethod
def _calc_sam(s1_norm, s2_norm, axis=0):
......@@ -193,6 +235,10 @@ class SID_Classifier(_ImageClassifier):
self.clf_name = 'spectral information divergence (SID)'
@property
def sid(self):
return self._distance_metrics
def _predict(self, tilepos):
assert global_shared_endmembers is not None and global_shared_im2classify is not None
......@@ -210,15 +256,17 @@ class SID_Classifier(_ImageClassifier):
sid = np.zeros((tileimdata.shape[0], tileimdata.shape[1], self.n_samples), np.float)
# if np.std(tileimdata) == 0: # skip tiles that only contain the same value
# loop over all training spectra and compute spectral information divergence for each pixel
for n_sample in range(self.n_samples):
train_spectrum = train_spectra_norm[n_sample, :].reshape(1, 1, self.n_features)
sid[:, :, n_sample] = self._calc_sid(tileimdata_norm,
train_spectrum,
axis=2)
sid_min = np.min(sid, axis=2).astype(np.float32)
cmap = np.argmin(sid, axis=2).astype(np.int16)
return tilepos, cmap
return tilepos, cmap, sid_min
@staticmethod
def _calc_sid(s1_norm, s2_norm, axis=0):
......@@ -259,12 +307,12 @@ class RF_Classifier(_ImageClassifier):
tileimdata = global_shared_im2classify[rS: rE + 1, cS: cE + 1, :]
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2]), None
def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal=None, cmap_nodataVal=None,
tiledims=(1000, 1000), CPUs=None, **kwargs):
# type: (Union[np.ndarray, GeoArray], np.ndarray, Union[np.ndarray, List[int]], str, int, ...) -> GeoArray
tiledims=(1000, 1000), CPUs=None, return_distance=False, **kwargs):
# type: (Union[np.ndarray, GeoArray], np.ndarray, Union[np.ndarray, List[int]], str, int, int, tuple, int, bool, dict) -> Union[GeoArray, Tuple[GeoArray, np.ndarray]] # noqa E501
"""Classify image to find the cluster each spectrum belongs to.
:param image: image to be classified
......@@ -281,6 +329,7 @@ def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal
:param cmap_nodataVal:
:param tiledims:
:param CPUs: number of CPUs to be used for classification
:param return_distance: whether to return the distance metrics leading to the returned classification map
:param kwargs: keyword arguments to be passed to classifiers if possible
"""
if classif_alg == 'kNN':
......@@ -318,7 +367,19 @@ def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal
cmap = clf.classify(image, in_nodataVal=in_nodataVal, cmap_nodataVal=cmap_nodataVal, tiledims=tiledims)
return cmap
if not return_distance:
return cmap
else:
if classif_alg == 'MinDist':
dist = clf.euclidian_distance
elif classif_alg == 'SAM':
dist = clf.angles_deg
elif classif_alg == 'SID':
dist = clf.sid
else:
dist = None
return cmap, dist
def normalize_endmembers_image(endmembers, image):
......
......@@ -42,12 +42,12 @@ class Test_MinimumDistance_Classifier(unittest.TestCase):
def test_classify(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
cmap_sp = MDC.classify(test_gA, in_nodataVal=-9999)
self.assertIsInstance(cmap_sp, np.ndarray)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=None)
cmap_mp = MDC.classify(test_gA, in_nodataVal=-9999)
self.assertIsInstance(cmap_mp, np.ndarray)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
......@@ -57,12 +57,12 @@ class Test_kNN_Classifier(unittest.TestCase):
def test_classify(self):
kNNC = kNN_Classifier(cluster_centers, cluster_labels, CPUs=1)
cmap_sp = kNNC.classify(test_gA, in_nodataVal=-9999)
self.assertIsInstance(cmap_sp, np.ndarray)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
kNNC = kNN_Classifier(cluster_centers, cluster_labels, CPUs=None)
cmap_mp = kNNC.classify(test_gA, in_nodataVal=-9999)
self.assertIsInstance(cmap_mp, np.ndarray)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
......@@ -72,12 +72,12 @@ class Test_SAM_Classifier(unittest.TestCase):
def test_classify(self):
SC = SAM_Classifier(cluster_centers, CPUs=1)
cmap_sp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_sp, np.ndarray)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
SC = SAM_Classifier(cluster_centers, CPUs=None)
cmap_mp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_mp, np.ndarray)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
......@@ -87,12 +87,12 @@ class Test_SID_Classifier(unittest.TestCase):
def test_classify(self):
SC = SID_Classifier(cluster_centers, CPUs=1)
cmap_sp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_sp, np.ndarray)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
SC = SID_Classifier(cluster_centers, CPUs=None)
cmap_mp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_mp, np.ndarray)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
......@@ -102,12 +102,12 @@ class Test_RF_Classifier(unittest.TestCase):
def test_classify(self):
RFC = RF_Classifier(cluster_centers, cluster_labels, CPUs=1, **dict(random_state=0))
cmap_sp = RFC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_sp, np.ndarray)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
RFC = RF_Classifier(cluster_centers, cluster_labels, CPUs=None, **dict(random_state=0))
cmap_mp = RFC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_mp, np.ndarray)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment