Commit cebe4b0f authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Further developed L2B_P.SpecHomo_Classifier. GMSLogger is now pickable....

Further developed L2B_P.SpecHomo_Classifier. GMSLogger is now pickable. Updated minimum versions of geoarray.


Former-commit-id: b6c7e883
parent b1bfe70a
......@@ -13,6 +13,7 @@ from sklearn.cluster import KMeans
from pandas import DataFrame
from typing import Union # noqa F401 # flake8 issue
from tqdm import tqdm
from multiprocessing import Pool
from sklearn.cluster import k_means_ # noqa F401 # flake8 issue
from geoarray import GeoArray # noqa F401 # flake8 issue
......@@ -20,6 +21,7 @@ from py_tools_ds.numeric.array import get_array_tilebounds
from ..config import GMS_config as CFG
from ..io.input_reader import SRF # noqa F401 # flake8 issue
from ..misc.logging import GMS_logger
from .L2A_P import L2A_object
__author__ = 'Daniel Scheffler'
......@@ -149,7 +151,7 @@ class SpectralResampler(object):
specval_rsp = np.average(spectrum_1nm, weights=self.srf_1nm[band])
if v:
plt.plot(self.wvl_1nm, self.srf_1nm/max(self.srf_1nm))
plt.plot(self.wvl_1nm, self.srf_1nm[band]/max(self.srf_1nm[band]))
plt.plot(wvl_center, specval_rsp/scale_factor, 'x', color='r')
spectrum_rsp.append(specval_rsp)
......@@ -331,10 +333,11 @@ class SpecHomo_Classifier(object):
"""
self.ims_ref = filelist_refs
self.v = v
self.logger = logger or logging.getLogger(__name__)
self.logger = logger or GMS_logger(__name__) # must be pickable
def generate_reference_cube(self, tgt_satellite, tgt_sensor, n_clusters=10, tgt_n_samples=1000):
# type: (str, str, int, int) -> np.ndarray
def generate_reference_cube(self, tgt_satellite, tgt_sensor, n_clusters=10, tgt_n_samples=1000, path_out='',
fmt_out=''):
# type: (str, str, int, int, str, str) -> np.ndarray
"""Generate reference spectra from all hyperspectral input images.
The hyperspectral images are spectrally resampled to the target sensor specifications. The resulting target
......@@ -346,6 +349,8 @@ class SpecHomo_Classifier(object):
:param tgt_sensor: target sensor, e.g.. 'OLI_TIRS'
:param n_clusters: number of clusters to be used for clustering the input images (KMeans)
:param tgt_n_samples: number o spectra to be collected from each input image
:param path_out: output path for the generated reference cube
:param fmt_out: output format (GDAL driver code)
:return: np.array: [tgt_n_samples x images x spectral bands of the target sensor]
"""
self.logger.info('Generating reference spectra from all input images...')
......@@ -359,52 +364,56 @@ class SpecHomo_Classifier(object):
tgt_srf.plot_srfs()
# generate random spectra samples equally for each KMeans cluster
random_samples_all_ims = dict()
with Pool(processes=len(self.ims_ref)) as pool:
args = [(im, tgt_srf, n_clusters, tgt_n_samples) for im in self.ims_ref]
random_samples_all_ims_list = pool.starmap(self._get_uniform_random_samples, args)
# Build the reference cube from the random samples of each image
# => rows: tgt_n_samples, columns: images, bands: spectral information
self.logger.info('Combining randomly sampled spectra to a single reference cube...')
ref_cube = np.hstack(random_samples_all_ims_list)
for im_ref in self.ims_ref:
im_name = os.path.basename(im_ref)
# save
if path_out:
GeoArray(ref_cube).save(out_path=path_out, fmt=fmt_out)
# read input image
self.logger.info('Reading the input image %s...' % im_name)
im_gA = GeoArray(im_ref)
im_gA.cwl = np.array(im_gA.meta.loc['wavelength'], dtype=np.float).flatten()
# im_gA.to_mem()
wvl_unit = 'nanometers' if max(im_gA.cwl) > 15 else 'micrometers'
return ref_cube
# perform spectral resampling of input image to match spectral properties of target sensor
self.logger.info('Performing spectral resampling to match spectral properties of target sensor...')
SR = SpectralResampler(im_gA.cwl, tgt_srf, wvl_unit=wvl_unit)
def _get_uniform_random_samples(self, im_ref, tgt_srf, n_clusters, tgt_n_samples):
im_name = os.path.basename(im_ref)
im_tgt = np.empty((*im_gA.shape[:2], len(tgt_srf.bands)))
for ((rS, rE), (cS, cE)), tiledata in tqdm(im_gA.tiles((1000, 1000)), total=900):
im_tgt[rS: rE + 1, cS: cE + 1, :] = SR.resample_image(tiledata)
im_tgt = GeoArray(im_tgt, im_gA.gt, im_gA.prj)
# read input image
self.logger.info('Reading the input image %s...' % im_name)
im_gA = GeoArray(im_ref)
im_gA.cwl = np.array(im_gA.meta.loc['wavelength'], dtype=np.float).flatten()
# im_gA.to_mem()
wvl_unit = 'nanometers' if max(im_gA.cwl) > 15 else 'micrometers'
# compute KMeans clusters for the spectrally resampled image
self.logger.info('Computing %s KMeans clusters...' % n_clusters)
kmeans = KMeansRSImage(im_tgt, n_clusters=n_clusters)
# perform spectral resampling of input image to match spectral properties of target sensor
self.logger.info('Performing spectral resampling to match spectral properties of target sensor...')
SR = SpectralResampler(im_gA.cwl, tgt_srf, wvl_unit=wvl_unit)
if self.v:
kmeans.plot_cluster_centers()
kmeans.plot_cluster_histogram()
im_tgt = np.empty((*im_gA.shape[:2], len(tgt_srf.bands)))
for ((rS, rE), (cS, cE)), tiledata in tqdm(im_gA.tiles((1000, 1000))):
im_tgt[rS: rE + 1, cS: cE + 1, :] = SR.resample_image(tiledata)
im_tgt = GeoArray(im_tgt, im_gA.gt, im_gA.prj)
# randomly grab the given number of spectra from each cluster
self.logger.info('Getting %s random spectra from each cluster...' % (tgt_n_samples//n_clusters))
random_samples = kmeans.get_random_spectra_from_each_cluster(samplesize=tgt_n_samples//n_clusters)
# compute KMeans clusters for the spectrally resampled image
self.logger.info('Computing %s KMeans clusters...' % n_clusters)
kmeans = KMeansRSImage(im_tgt, n_clusters=n_clusters)
# combine the spectra (2D arrays) of all clusters to a single 2D array
random_samples = np.vstack([random_samples[clusterlabel] for clusterlabel in random_samples])
if self.v:
kmeans.plot_cluster_centers()
kmeans.plot_cluster_histogram()
# reshape it so that we have the spectral information in 3rd dimension (x rows, 1 column and z bands)
random_samples = random_samples.reshape((random_samples.shape[0], 1, random_samples.shape[1]))
# randomly grab the given number of spectra from each cluster
self.logger.info('Getting %s random spectra from each cluster...' % (tgt_n_samples // n_clusters))
random_samples = kmeans.get_random_spectra_from_each_cluster(samplesize=tgt_n_samples // n_clusters)
# assign the result to the dict of all images
random_samples_all_ims[im_name] = random_samples
# combine the spectra (2D arrays) of all clusters to a single 2D array
random_samples = np.vstack([random_samples[clusterlabel] for clusterlabel in random_samples])
# Build the reference cube from the random samples of each image
# => rows: tgt_n_samples, columns: images, bands: spectral information
self.logger.info('Combining randomly sampled spectra to a single reference cube...')
im_names = [os.path.basename(p) for p in self.ims_ref]
ref_cube = np.hstack([random_samples_all_ims[imN] for imN in im_names])
# reshape it so that we have the spectral information in 3rd dimension (x rows, 1 column and z bands)
random_samples = random_samples.reshape((random_samples.shape[0], 1, random_samples.shape[1]))
return ref_cube
return random_samples
......@@ -28,6 +28,12 @@ class GMS_logger(logging.Logger):
# private attributes
self._captured_stream = ''
self.name_logfile = name_logfile
self.fmt_suffix = fmt_suffix
self.path_logfile = path_logfile
self.log_level = log_level
self.appendMode = append
super(GMS_logger, self).__init__(name_logfile)
self.path_logfile = path_logfile
......@@ -100,6 +106,17 @@ class GMS_logger(logging.Logger):
# logger.addHandler(logfileHandler)
# logger.addHandler(consoleHandler_out)
def __getstate__(self):
self.close()
return self.__dict__
def __setstate__(self, ObjDict):
"""Defines how the attributes of GMS object are unpickled."""
self.__init__(ObjDict['name_logfile'], fmt_suffix=ObjDict['fmt_suffix'], path_logfile=ObjDict['path_logfile'],
log_level=ObjDict['log_level'], append=True)
ObjDict = self.__dict__
return ObjDict
@property
def captured_stream(self):
if not self._captured_stream:
......
py_tools_ds>=0.10.0
geoarray>=0.7.0
geoarray>=0.7.1
arosics>=0.6.6
git+https://gitext.gfz-potsdam.de/EnMAP/sicor.git
matplotlib
......
......@@ -14,7 +14,7 @@ with open('HISTORY.rst') as history_file:
requirements = [
'matplotlib', 'numpy', 'scikit-learn', 'scipy', 'gdal', 'pyproj', 'shapely', 'ephem', 'pyorbital', 'dill', 'pytz',
'pandas', 'numba', 'spectral>=0.16', 'geopandas', 'iso8601', 'pyinstrument', 'geoalchemy2', 'sqlalchemy',
'psycopg2', 'py_tools_ds>=0.10.0', 'geoarray>=0.7', 'arosics>=0.6.6', 'six'
'psycopg2', 'py_tools_ds>=0.10.0', 'geoarray>=0.7.1', 'arosics>=0.6.6', 'six'
# spectral<0.16 has some problems with writing signed integer 8bit data
# fmask # conda install -c conda-forge python-fmask
# 'pyhdf', # conda install --yes -c conda-forge pyhdf
......
......@@ -18,7 +18,7 @@ RUN /bin/bash -i -c "source /root/anaconda3/bin/activate ; \
conda install --yes -c conda-forge 'icu=58.*' lxml ; \
pip install pandas geopandas dicttoxml jsmin cerberus pyprind pint iso8601 tqdm mpld3 sphinx-argparse dill pytz \
spectral>0.16 psycopg2 pyorbital pyinstrument geoalchemy2 sqlalchemy py_tools_ds>=0.10.0 \
geoarray>=0.7.0 arosics>=0.6.6 flake8 pycodestyle pylint pydocstyle nose nose2 nose-htmloutput \
geoarray>=0.7.1 arosics>=0.6.6 flake8 pycodestyle pylint pydocstyle nose nose2 nose-htmloutput \
coverage rednose six" # must include all the requirements needed to build the docs!
# copy some needed stuff to /root
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment