Commit 6d8ed7ea authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Classification algorithms now ignore pixels with nodata in the input image....


Classification algorithms now ignore pixels with nodata in the input image. Image classification algorithms are now much faster. Improved show_cmap(), added _show_distance_metrics() and subclass methods.Fixed bug related to initialization value of euclidian distances.
Signed-off-by: Daniel Scheffler's avatarDaniel Scheffler <danschef@gfz-potsdam.de>
parent b276be50
Pipeline #3889 failed with stage
in 2 minutes and 3 seconds
......@@ -64,7 +64,7 @@ class _ImageClassifier(object):
dtype_cmap = np.find_common_type(np.array(self.train_labels), np.array([cmap_nodataVal]))
image_cube_gA = GeoArray(image_cube, nodata=in_nodataVal)
image_cube_gA.to_mem()
# image_cube_gA.to_mem()
bounds_alltiles = get_array_tilebounds(image_cube_gA.shape, tiledims)
......@@ -88,15 +88,29 @@ class _ImageClassifier(object):
if len(tile_res) == 3:
dist[rS: rE + 1, cS: cE + 1] = tile_res[2]
if cmap_nodataVal is not None:
cmap[image_cube_gA.mask_nodata.astype(np.int8) == 0] = cmap_nodataVal
self.cmap = cmap
if len(tiles_results[0]) == 3:
self._distance_metrics = dist
return self.cmap
@staticmethod
def overwrite_cmap_at_nodata_positions(cmap, imdata, cmap_nodataVal, im_nodataVal):
"""Overwrite the classification map at all positions with nodata in ANY band.
NOTE: nodata in not all but in any band would cause a wrong output class
:param cmap:
:param imdata:
:param cmap_nodataVal:
:param im_nodataVal:
:return:
"""
mask_anynodata = np.any(imdata == im_nodataVal, axis=2)
cmap[mask_anynodata] = cmap_nodataVal
return cmap
@staticmethod
def _label_unclassified_pixels(cmap, label_unclassified, threshold, distances):
# type: (GeoArray, int, Union[str, int, float], np.ndarray) -> GeoArray
......@@ -109,7 +123,11 @@ class _ImageClassifier(object):
pass
elif isinstance(threshold, str) and threshold.endswith('%'):
percent = float(threshold.split('%')[0].strip())
# only include distances where the classification map is not nodata
# at nodata positions, the distances may have the initialzation value 1e6 (MinDist)
dists = distances[cmap[:] != cmap_nodataVal] if cmap_nodataVal is not None else distances
threshold = np.nanpercentile(dists, 100 - percent)
else:
raise ValueError(threshold)
......@@ -123,9 +141,21 @@ class _ImageClassifier(object):
return cmap
def show_cmap(self):
if self.cmap:
self.cmap.show()
def show_cmap(self, **kwargs):
if self.cmap is not None:
self.cmap.show(cmap=kwargs.pop('cmap', 'Spectral'),
**kwargs)
def _show_distance_metrics(self, **kwargs):
if self._distance_metrics is not None:
dists = GeoArray(self._distance_metrics)
if self._cmap_nodataVal is not None:
dists[self.cmap[:] == self._cmap_nodataVal] = -9999
dists.nodata = -9999
dists.show(cmap=kwargs.pop('cmap', 'Spectral_r'),
vmin=kwargs.pop('vmin', 0),
**kwargs)
@staticmethod
def _show_distances_histogram(distances, cmap, figsize=(10, 5), bins=100, normed=False):
......@@ -166,7 +196,7 @@ class MinimumDistance_Classifier(_ImageClassifier):
def compute_euclidian_distance(self, imdata, cmap, nodataVal_cmap):
spectra = im2spectra(imdata)
distances = np.empty(np.dot(*imdata.shape[:2]), np.float32)
distances = np.full(np.dot(*imdata.shape[:2]), 1e6, np.float32)
labels = cmap.flatten()
for lbl in np.unique(cmap):
......@@ -186,6 +216,10 @@ class MinimumDistance_Classifier(_ImageClassifier):
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
cmap = self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
if global_shared_im2classify.nodata is not None and self._cmap_nodataVal is not None:
cmap = self.overwrite_cmap_at_nodata_positions(cmap, tileimdata,
self._cmap_nodataVal, global_shared_im2classify.nodata)
dist = self.compute_euclidian_distance(tileimdata.astype(np.float32), cmap, self._cmap_nodataVal)
return tilepos, cmap, dist
......@@ -199,6 +233,9 @@ class MinimumDistance_Classifier(_ImageClassifier):
def show_distances_histogram(self, figsize=(10, 5), bins=100, normed=False):
self._show_distances_histogram(self.euclidian_distance, self.cmap, figsize=figsize, bins=bins, normed=normed)
def show_distances(self, **kwargs):
self._show_distance_metrics(**kwargs)
class kNN_Classifier(_ImageClassifier):
def __init__(self, train_spectra, train_labels, CPUs=1, **kwargs):
......@@ -261,6 +298,10 @@ class SAM_Classifier(_ImageClassifier):
angles_min = np.min(angles, axis=2).astype(np.float32)
cmap = np.argmin(angles, axis=2).astype(np.int16)
if global_shared_im2classify.nodata is not None and self._cmap_nodataVal is not None:
cmap = self.overwrite_cmap_at_nodata_positions(cmap, tileimdata,
self._cmap_nodataVal, global_shared_im2classify.nodata)
return tilepos, cmap, angles_min
@staticmethod
......@@ -288,6 +329,9 @@ class SAM_Classifier(_ImageClassifier):
def show_angles_histogram(self, figsize=(10, 5), bins=100, normed=False):
self._show_distances_histogram(self.angles_deg, self.cmap, figsize=figsize, bins=bins, normed=normed)
def show_angles(self, **kwargs):
self._show_distance_metrics(**kwargs)
class SID_Classifier(_ImageClassifier):
def __init__(self, train_spectra, CPUs=1):
......@@ -327,6 +371,10 @@ class SID_Classifier(_ImageClassifier):
sid_min = np.min(sid, axis=2).astype(np.float32)
cmap = np.argmin(sid, axis=2).astype(np.int16)
if global_shared_im2classify.nodata is not None and self._cmap_nodataVal is not None:
cmap = self.overwrite_cmap_at_nodata_positions(cmap, tileimdata,
self._cmap_nodataVal, global_shared_im2classify.nodata)
return tilepos, cmap, sid_min
@staticmethod
......@@ -377,7 +425,13 @@ class RF_Classifier(_ImageClassifier):
tileimdata = global_shared_im2classify[rS: rE + 1, cS: cE + 1, :]
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2]), None
cmap = self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
if global_shared_im2classify.nodata is not None and self._cmap_nodataVal is not None:
cmap = self.overwrite_cmap_at_nodata_positions(cmap, tileimdata,
self._cmap_nodataVal, global_shared_im2classify.nodata)
return tilepos, cmap, None
def classify_image(image, train_spectra, train_labels, classif_alg, in_nodataVal=None, cmap_nodataVal=None,
......
......@@ -26,6 +26,8 @@ cfg = set_config(job_ID=26186196, db_host=db_host, reset_status=True, is_test=Tr
path_classifier_zip = os.path.join(cfg.path_spechomo_classif, 'LR_classifiers.zip')
fName_cls = 'LR_clust50__Landsat-7__ETM+.dill'
test_gA = GeoArray(np.random.randint(0, 10000, (1010, 1010, 6), np.int16)) # 6 Landsat-5 bands
test_gA[:5, 0, :] = -9999
test_gA[:5, 1, 3] = -9999
# get cluster centers
with zipfile.ZipFile(path_classifier_zip, "r") as zf, tempfile.TemporaryDirectory() as td:
......@@ -41,17 +43,27 @@ with zipfile.ZipFile(path_classifier_zip, "r") as zf, tempfile.TemporaryDirector
class Test_MinimumDistance_Classifier(unittest.TestCase):
def test_classify(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
cmap_sp = MDC.classify(test_gA, in_nodataVal=-9999)
cmap_sp = MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999)
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=None)
cmap_mp = MDC.classify(test_gA, in_nodataVal=-9999)
cmap_mp = MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999)
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
def test_label_unclassified_pixels_absolute_th(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
MDC.label_unclassified_pixels(label_unclassified=-1, threshold=6000)
def test_label_unclassified_pixels_relative_th(self):
MDC = MinimumDistance_Classifier(cluster_centers, cluster_labels, CPUs=1)
MDC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
MDC.label_unclassified_pixels(label_unclassified=-1, threshold='10%')
class Test_kNN_Classifier(unittest.TestCase):
def test_classify(self):
......@@ -71,22 +83,25 @@ class Test_kNN_Classifier(unittest.TestCase):
class Test_SAM_Classifier(unittest.TestCase):
def test_classify(self):
SC = SAM_Classifier(cluster_centers, CPUs=1)
cmap_sp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
cmap_sp = SC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_sp, GeoArray)
self.assertEqual(cmap_sp.shape, (1010, 1010))
SC = SAM_Classifier(cluster_centers, CPUs=None)
cmap_mp = SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
cmap_mp = SC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
self.assertIsInstance(cmap_mp, GeoArray)
self.assertEqual(cmap_mp.shape, (1010, 1010))
self.assertTrue(np.array_equal(cmap_sp, cmap_mp))
def test_label_unclassified_pixels(self):
def test_label_unclassified_pixels_absolute_th(self):
SC = SAM_Classifier(cluster_centers, CPUs=None)
SC.classify(test_gA, in_nodataVal=-9999, tiledims=(400, 200))
SC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
SC.label_unclassified_pixels(label_unclassified=-1, threshold=10)
def test_label_unclassified_pixels_relative_th(self):
SC = SAM_Classifier(cluster_centers, CPUs=None)
SC.classify(test_gA, in_nodataVal=-9999, cmap_nodataVal=-9999, tiledims=(400, 200))
SC.label_unclassified_pixels(label_unclassified=-1, threshold='10%')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment