Commit f31273d0 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

RefCubes are now saved as integer arrays. Added wavelengths to RefCube metadata.

Added pysptools to dependencies.
parent c9de3d0c
......@@ -32,7 +32,7 @@ test_gms_preprocessing:
# - python setup.py install
# - cd ../../
# make tests
- pip install nested_dict openpyxl timeout_decorator redis retools redis-semaphore psutil # FIXME: remove as soon as runner is rebuilt
- pip install nested_dict openpyxl timeout_decorator redis retools redis-semaphore psutil pysptools # FIXME: remove as soon as runner is rebuilt
- make nosetests
- make docs
artifacts:
......
......@@ -31,7 +31,6 @@ from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline, Pipeline # noqa F401 # flake8 issue
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from pysptools.classification import SAM
from numba import jit
from geoarray import GeoArray # noqa F401 # flake8 issue
from ..options.config import GMS_config as CFG
......@@ -851,7 +850,7 @@ class ReferenceCube_Generator(object):
src_im = GeoArray(im)
# get random spectra of the original (hyperspectral) image, equally distributed over all computed clusters
unif_random_spectra = self.cluster_image_and_get_uniform_spectra(src_im, progress=progress)
unif_random_spectra = self.cluster_image_and_get_uniform_spectra(src_im, progress=progress).astype(np.int16)
# resample the set of random spectra to match the spectral characteristics of all target sensors
for tgt_sat, tgt_sen in self.tgt_sat_sen_list:
......@@ -898,7 +897,7 @@ class ReferenceCube_Generator(object):
if downsamp_sat and downsamp_sen:
tgt_srf = SRF(GMS_identifier(satellite=downsamp_sat, sensor=downsamp_sen, subsystem=None, image_type='RSD',
dataset_ID=-9999, proc_level='L1A', logger=self.logger))
im2clust = self.resample_image_spectrally(im2clust, tgt_srf, progress=progress)
im2clust = self.resample_image_spectrally(im2clust, tgt_srf, progress=progress) # output = int16
# compute KMeans clusters for the spectrally resampled image
self.logger.info('Computing %s KMeans clusters from the input image %s...'
......@@ -986,9 +985,10 @@ class RefCube(object):
"""
# privates
self._col_imName_dict = dict()
self._wavelenths = []
# defaults
self.data = GeoArray(np.empty((0, 0, 0))) # type: GeoArray
self.data = GeoArray(np.empty((0, 0, len(LayerBandsAssignment) if LayerBandsAssignment else 0)))
self.srcImNames = []
# args/ kwargs
......@@ -996,10 +996,20 @@ class RefCube(object):
self.satellite = satellite
self.sensor = sensor
self.LayerBandsAssignment = LayerBandsAssignment or []
# TODO add CWLs and FWHMs (allows direct comparison of multi-sensor refcubes in ENVI)
if filepath:
self.from_filepath(filepath)
self.read_data_from_disk(filepath)
if self.satellite and self.sensor and self.LayerBandsAssignment:
self._add_bandnames_wavelenghts_to_meta()
def _add_bandnames_wavelenghts_to_meta(self):
# set bandnames
self.data.bandnames = ['Band %s' % b for b in self.LayerBandsAssignment]
# set wavelengths
self.data.metadata.loc['wavelength'] = self.wavelengths
# TODO: NOTE: wavelengths are not written to ENVI header because currently GeoArray does not support that
@property
def n_images(self):
......@@ -1035,6 +1045,19 @@ class RefCube(object):
self._col_imName_dict = col_imName_dict
self.srcImNames = list(col_imName_dict.values())
@property
def wavelengths(self):
if not self._wavelenths and self.satellite and self.sensor and self.LayerBandsAssignment:
srf = SRF(GMS_identifier(satellite=self.satellite, sensor=self.sensor, subsystem=None, image_type='RSD',
dataset_ID=-9999, proc_level='L1A', logger=None),
no_thermal=True, no_pan=False)
self._wavelenths = [srf.conv[b] for b in self.LayerBandsAssignment]
return self._wavelenths
@wavelengths.setter
def wavelengths(self, wavelengths):
self._wavelenths = wavelengths
def add_refcube_array(self, refcube_array, src_imnames, LayerBandsAssignment):
# type: (Union[str, np.ndarray], list, list) -> None
"""Add the given given array to the RefCube instance.
......@@ -1093,7 +1116,7 @@ class RefCube(object):
def metadata(self):
"""Return an ordered dictionary holding the metadata of the reference cube."""
attrs2include = ['satellite', 'sensor', 'filepath', 'n_signatures', 'n_images', 'n_clusters',
'n_signatures_per_cluster', 'col_imName_dict', 'LayerBandsAssignment']
'n_signatures_per_cluster', 'col_imName_dict', 'LayerBandsAssignment', 'wavelengths']
return OrderedDict((k, getattr(self, k)) for k in attrs2include)
def get_band_combination(self, tgt_LBA):
......@@ -1120,8 +1143,7 @@ class RefCube(object):
self.data = self.get_band_combination(tgt_LBA)
self.LayerBandsAssignment = tgt_LBA
def from_filepath(self, filepath):
# TODO convert to class method
def read_data_from_disk(self, filepath):
self.data = GeoArray(filepath)
with open(os.path.splitext(filepath)[0] + '.meta', 'r') as metaF:
......@@ -1209,7 +1231,10 @@ class Classifier_Generator(object):
'QR': Quadratic Regression
:param kwargs: keyword arguments to be passed to the __init__() function of machine learners
"""
# train the model
###################
# train the model #
###################
ML = get_machine_learner(method, **kwargs)
ML.fit(train_X, train_Y)
......@@ -1217,7 +1242,10 @@ class Classifier_Generator(object):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true), axis=0) * 100
# compute RMSE, MAE, MAPE
###########################
# compute RMSE, MAE, MAPE #
###########################
from sklearn.metrics import mean_squared_error, mean_absolute_error
predicted = ML.predict(test_X) # returns 2D array (spectral samples x bands), e.g. 640 x 6
# NOTE: 'raw_values': RMSE is column-wise computed
......
......@@ -21,3 +21,4 @@ redis
retools
redis-semaphore
psutil
pysptools
......@@ -19,7 +19,7 @@ requirements = [
'matplotlib', 'numpy', 'scikit-learn', 'scipy', 'gdal', 'pyproj', 'shapely', 'ephem', 'pyorbital', 'dill', 'pytz',
'pandas', 'numba', 'spectral>=0.16', 'geopandas', 'iso8601', 'pyinstrument', 'geoalchemy2', 'sqlalchemy',
'psycopg2', 'py_tools_ds>=0.12.4', 'geoarray>=0.7.13', 'arosics>=0.6.6', 'six', 'tqdm', 'jsmin', 'cerberus',
'nested_dict', 'openpyxl', 'timeout_decorator', 'redis', 'retools', 'redis-semaphore', 'psutil'
'nested_dict', 'openpyxl', 'timeout_decorator', 'redis', 'retools', 'redis-semaphore', 'psutil', 'pysptools'
# spectral<0.16 has some problems with writing signed integer 8bit data
# fmask # conda install -c conda-forge python-fmask
# 'pyhdf', # conda install --yes -c conda-forge pyhdf
......
......@@ -78,6 +78,7 @@ dependencies:
- retools
- redis-semaphore
- psutil
- pysptools
- py_tools_ds>=0.12.4
- geoarray>=0.7.13
- arosics>=0.6.6
......
......@@ -17,7 +17,6 @@ from geoarray import GeoArray
import gms_preprocessing
from gms_preprocessing import set_config # noqa E402 module level import not at top of file
# from gms_preprocessing.algorithms.L2B_P import ReferenceCube_Generator_OLD # noqa E402 module level import not at top of file
from gms_preprocessing.algorithms.L2B_P import ReferenceCube_Generator # noqa E402 module level import not at top of file
from gms_preprocessing.algorithms.L2B_P import RefCube # noqa E402 module level import not at top of file
from gms_preprocessing.algorithms.L2B_P import SpectralHomogenizer # noqa E402 module level import not at top of file
......@@ -35,41 +34,6 @@ refcube_l5 = os.path.join(gms_preprocessing.__path__[0],
'../tests/data/spechomo_inputs/refcube__Landsat-5__TM__nclust50__nsamp100.bsq')
# class Test_ReferenceCube_Generator_OLD(unittest.TestCase):
# """Tests class for gms_preprocessing.algorithms.L2B_P.SpecHomo_Classifier"""
#
# @classmethod
# def setUpClass(cls):
# # Testjob Landsat-8
# set_config(job_ID=26186196, db_host=db_host, reset_status=True, is_test=True)
# cls.SHC = ReferenceCube_Generator_OLD([testdata, testdata, ], v=False)
#
# def test_generate_reference_cube_L8(self):
# ref_cube = self.SHC.generate_reference_cube('Landsat-8', 'OLI_TIRS', n_clusters=10, tgt_n_samples=1000)
# self.assertIsInstance(ref_cube, np.ndarray)
# self.assertTrue(np.any(ref_cube), msg='Reference cube for Landsat-8 is empty.')
#
# def test_generate_reference_cube_S2A(self):
# ref_cube = self.SHC.generate_reference_cube('Sentinel-2A', 'MSI', n_clusters=10, tgt_n_samples=1000)
# self.assertIsInstance(ref_cube, np.ndarray)
# self.assertTrue(np.any(ref_cube), msg='Reference cube for Sentinel-2A is empty.')
#
# @unittest.SkipTest
# def test_generate_reference_cube_AST(self):
# ref_cube = self.SHC.generate_reference_cube('Terra', 'ASTER', n_clusters=10, tgt_n_samples=1000)
# self.assertIsInstance(ref_cube, np.ndarray)
#
# def test_multiprocessing(self):
# SHC = ReferenceCube_Generator_OLD([testdata, testdata, ], v=False, CPUs=1)
# ref_cube_sp = SHC.generate_reference_cube('Landsat-8', 'OLI_TIRS', n_clusters=10, tgt_n_samples=1000)
#
# SHC = ReferenceCube_Generator_OLD([testdata, testdata, ], v=False, CPUs=None)
# ref_cube_mp = SHC.generate_reference_cube('Landsat-8', 'OLI_TIRS', n_clusters=10, tgt_n_samples=1000)
#
# self.assertTrue(np.any(ref_cube_sp), msg='Singleprocessing result is empty.')
# self.assertTrue(np.any(ref_cube_mp), msg='Multiprocessing result is empty.')
class Test_ReferenceCube_Generator(unittest.TestCase):
"""Tests class for gms_preprocessing.algorithms.L2B_P.SpecHomo_Classifier"""
......@@ -126,6 +90,7 @@ class Test_ReferenceCube_Generator(unittest.TestCase):
self.assertIsInstance(refcubes, dict)
self.assertIsInstance(refcubes[('Landsat-8', 'OLI_TIRS')], RefCube)
self.assertEqual(refcubes[('Landsat-8', 'OLI_TIRS')].data.shape, (self.tgt_n_samples, len(self.testIms), 9))
self.assertNotEqual(len(os.listdir(self.tmpOutdir.name)), 0)
# @unittest.SkipTest
# def test_multiprocessing(self):
......@@ -275,8 +240,10 @@ class Test_SpectralHomogenizer(unittest.TestCase):
cls.SpH = SpectralHomogenizer(classifier_rootDir=cfg.path_spechomo_classif)
# cls.testArr_L8 = GeoArray(np.random.randint(1, 10000, (50, 50, 7), dtype=np.int16)) # no band 9, no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B__250x250.bsq') # no pan
cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B.bsq') # no pan
# cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
# 'Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B__250x250.bsq') # no pan
cls.testArr_L8 = GeoArray('/home/gfz-fe/scheffler/temp/'
'Landsat-8__OLI_TIRS__LC81940242014072LGN00_L2B.bsq') # no pan
# cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1609.09, 2201.25]
cls.cwl_L8 = [442.98, 482.59, 561.33, 654.61, 864.57, 1373.48, 1609.09, 2201.25]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment