Commit 0ddae00e authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Moved image classifiers from L2B_P to new module 'classification'.

parent 2ce6489d
......@@ -29,8 +29,6 @@ from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline, Pipeline # noqa F401 # flake8 issue
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from pysptools.classification import SAM
from geoarray import GeoArray # noqa F401 # flake8 issue
from ..options.config import GMS_config as CFG
......@@ -42,6 +40,7 @@ from ..misc.logging import close_logger
from ..model.metadata import get_LayerBandsAssignment
from .L2A_P import L2A_object
from ..model.gms_object import GMS_identifier
from .classification import MinimumDistance_Classifier, kNN_Classifier, SAM_Classifier
__author__ = 'Daniel Scheffler'
......@@ -1648,114 +1647,12 @@ class RSImage_Predictor(object):
return errors
class ImageClassifier(object):
"""Base class for GMS image classifiers."""
def __init__(self, train_spectra, train_labels, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
self.CPUs = CPUs
self.train_spectra = train_spectra
self.train_labels = train_labels
self.n_samples = train_spectra.shape[0]
self.n_features = train_spectra.shape[1]
self.clf = None # to be implemented by the subclass
self.cmap = None
def _predict(self, tilepos, tileimdata):
raise NotImplementedError('This method has to be implemented by the subclass.')
def classify(self, image_cube, nodataVal=None, tiledims=(1000, 1000)):
image_cube_gA = GeoArray(image_cube, nodata=nodataVal)
self.cmap = GeoArray(np.empty((image_cube_gA.rows, image_cube_gA.cols),
dtype=np.array(self.train_labels).dtype), nodata=nodataVal)
if self.CPUs is None or self.CPUs > 1:
with Pool(self.CPUs) as pool:
tiles_cm = pool.starmap(self._predict, image_cube_gA.tiles(tiledims))
for ((rS, rE), (cS, cE)), tile_cm in tiles_cm:
self.cmap[rS: rE + 1, cS: cE + 1] = tile_cm
else:
for ((rS, rE), (cS, cE)), tile in tqdm(image_cube_gA.tiles(tiledims)):
print('Performing classification for tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
self.cmap[rS: rE + 1, cS: cE + 1] = self._predict(((rS, rE), (cS, cE)), tile)[1]
if nodataVal is not None:
self.cmap[image_cube_gA.mask_nodata.astype(np.int8) == 0] = nodataVal
return self.cmap.astype(image_cube.dtype)
def show_cmap(self):
if self.cmap:
self.cmap.show()
class MinimumDistance_Classifier(ImageClassifier):
"""Classifier computing the n-dimensional euclidian distance of each pixel vector to each cluster mean vector.
NOTE: distance equation: D² = sqrt(sum((Xvi - Xvj)²)
"""
def __init__(self, train_spectra, train_labels, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(MinimumDistance_Classifier, self).__init__(train_spectra, train_labels, CPUs=CPUs)
self.clf = NearestCentroid()
self.clf.fit(train_spectra, train_labels)
def _predict(self, tilepos, tileimdata):
spectra = im2spectra(tileimdata)
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
class kNN_Classifier(ImageClassifier):
def __init__(self, train_spectra, train_labels, CPUs=1, n_neighbors=10):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(kNN_Classifier, self).__init__(train_spectra, train_labels, CPUs=CPUs)
self.clf = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=CPUs)
self.clf.fit(train_spectra, train_labels)
def _predict(self, tilepos, tileimdata):
spectra = im2spectra(tileimdata)
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
class SAM_Classifier(ImageClassifier):
def __init__(self, train_spectra, threshold=0.1, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(SAM_Classifier, self).__init__(train_spectra, np.array(range(train_spectra.shape[0])), CPUs=CPUs)
self.clf = SAM()
self.threshold = threshold
def _predict(self, tilepos, tileimdata):
return self.clf.classify(tileimdata, self.train_spectra, self.threshold)
def classify(self, image_cube, nodataVal=None, tiledims=(1000, 1000), mask=None):
image_cube_gA = GeoArray(image_cube, nodata=nodataVal)
# avoid "RuntimeWarning: invalid value encountered in less" during SAM.classify()
if mask:
image_cube_gA[mask] = np.max(image_cube_gA)
elif nodataVal is not None:
image_cube_gA[image_cube_gA[:] == nodataVal] = np.max(image_cube_gA)
else:
image_cube_gA[image_cube_gA.mask_nodata.astype(np.int8) == 0] = np.max(image_cube_gA)
cmap = super(SAM_Classifier, self).classify(image_cube_gA, nodataVal=nodataVal, tiledims=tiledims)
if mask:
cmap[mask] = -9999
return cmap
class RSImage_ClusterPredictor(object):
"""Predictor class applying the predict() function of a machine learning classifier described by the given args."""
def __init__(self, method='LR', n_clusters=50, classif_alg='MinDist', kNN_n_neighbors=10, classifier_rootDir='',
CPUs=1):
# type: (str, int, str, int, str, int) -> None
"""Get an instance of RSImage_Predictor.
"""Get an instance of RSImage_ClusterPredictor.
:param method: machine learning approach to be used for spectral bands prediction
'LR': Linear Regression
......
# -*- coding: utf-8 -*-
"""
Algorithms for multispectral image classification.
"""
import numpy as np
from typing import Union, List
from multiprocessing import Pool
from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from pysptools.classification import SAM
from geoarray import GeoArray
class _ImageClassifier(object):
"""Base class for GMS image classifiers."""
def __init__(self, train_spectra, train_labels, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
self.CPUs = CPUs
self.train_spectra = train_spectra
self.train_labels = train_labels
self.n_samples = train_spectra.shape[0]
self.n_features = train_spectra.shape[1]
self.clf = None # to be implemented by the subclass
self.cmap = None
def _predict(self, tilepos, tileimdata):
raise NotImplementedError('This method has to be implemented by the subclass.')
def classify(self, image_cube, nodataVal=None, tiledims=(1000, 1000)):
image_cube_gA = GeoArray(image_cube, nodata=nodataVal)
self.cmap = GeoArray(np.empty((image_cube_gA.rows, image_cube_gA.cols),
dtype=np.array(self.train_labels).dtype), nodata=nodataVal)
if self.CPUs is None or self.CPUs > 1:
with Pool(self.CPUs) as pool:
tiles_cm = pool.starmap(self._predict, image_cube_gA.tiles(tiledims))
for ((rS, rE), (cS, cE)), tile_cm in tiles_cm:
self.cmap[rS: rE + 1, cS: cE + 1] = tile_cm
else:
for ((rS, rE), (cS, cE)), tile in tqdm(image_cube_gA.tiles(tiledims)):
print('Performing classification for tile ((%s, %s), (%s, %s))...' % (rS, rE, cS, cE))
self.cmap[rS: rE + 1, cS: cE + 1] = self._predict(((rS, rE), (cS, cE)), tile)[1]
if nodataVal is not None:
self.cmap[image_cube_gA.mask_nodata.astype(np.int8) == 0] = nodataVal
return self.cmap.astype(image_cube.dtype)
def show_cmap(self):
if self.cmap:
self.cmap.show()
class MinimumDistance_Classifier(_ImageClassifier):
"""Classifier computing the n-dimensional euclidian distance of each pixel vector to each cluster mean vector.
NOTE: distance equation: D² = sqrt(sum((Xvi - Xvj)²)
"""
def __init__(self, train_spectra, train_labels, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(MinimumDistance_Classifier, self).__init__(train_spectra, train_labels, CPUs=CPUs)
self.clf = NearestCentroid()
self.clf.fit(train_spectra, train_labels)
def _predict(self, tilepos, tileimdata):
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
class kNN_Classifier(_ImageClassifier):
def __init__(self, train_spectra, train_labels, CPUs=1, n_neighbors=10):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(kNN_Classifier, self).__init__(train_spectra, train_labels, CPUs=CPUs)
self.clf = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=CPUs)
self.clf.fit(train_spectra, train_labels)
def _predict(self, tilepos, tileimdata):
spectra = tileimdata.reshape((tileimdata.shape[0] * tileimdata.shape[1], tileimdata.shape[2]))
return tilepos, self.clf.predict(spectra).reshape(*tileimdata.shape[:2])
class SAM_Classifier(_ImageClassifier):
def __init__(self, train_spectra, threshold=0.1, CPUs=1):
# type: (np.ndarray, Union[np.ndarray, List[int]], int) -> None
super(SAM_Classifier, self).__init__(train_spectra, np.array(range(train_spectra.shape[0])), CPUs=CPUs)
self.clf = SAM()
self.threshold = threshold
def _predict(self, tilepos, tileimdata):
return self.clf.classify(tileimdata, self.train_spectra, self.threshold)
def classify(self, image_cube, nodataVal=None, tiledims=(1000, 1000), mask=None):
image_cube_gA = GeoArray(image_cube, nodata=nodataVal)
# avoid "RuntimeWarning: invalid value encountered in less" during SAM.classify()
if mask:
image_cube_gA[mask] = np.max(image_cube_gA)
elif nodataVal is not None:
image_cube_gA[image_cube_gA[:] == nodataVal] = np.max(image_cube_gA)
else:
image_cube_gA[image_cube_gA.mask_nodata.astype(np.int8) == 0] = np.max(image_cube_gA)
cmap = super(SAM_Classifier, self).classify(image_cube_gA, nodataVal=nodataVal, tiledims=tiledims)
if mask:
cmap[mask] = -9999
return cmap
......@@ -239,13 +239,13 @@ class Test_SpectralHomogenizer(unittest.TestCase):
cfg = set_config(job_ID=26186196, db_host=db_host, reset_status=True, is_test=True)
cls.SpH = SpectralHomogenizer(classifier_rootDir=cfg.path_spechomo_classif)
# cls.testArr_L8 = GeoArray(np.random.randint(1, 10000, (50, 50, 7), dtype=np.int16)) # no band 9, no pan
cls.testArr_L8 = GeoArray(np.random.randint(1, 10000, (50, 50, 7), dtype=np.int16)) # no band 9, no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
# 'Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B__250x250.bsq') # no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
# 'Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B.bsq') # no pan
cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
'clusterhomo_sourceL8_full_withoutB9.bsq') # no pan, no cirrus
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
# 'clusterhomo_sourceL8_full_withoutB9.bsq') # no pan, no cirrus
# cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1609.09, 2201.25]
cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1373.48, 1609.09, 2201.25]
......@@ -344,7 +344,7 @@ class Test_SpectralHomogenizer(unittest.TestCase):
class Test_SAM_Classifier(unittest.TestCase):
def test_classify(self):
from gms_preprocessing.algorithms.L2B_P import SAM_Classifier
from gms_preprocessing.algorithms.classification import SAM_Classifier
gA = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-7__ETM+__LE71920242016104NSG00_image_data_L1A.bsq')
with open('/home/gfz-fe/scheffler/temp/SPECHOM_py/QR_clust50__Landsat-7__ETM+.dill', 'rb') as inF:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment