Commit 7ecd44b0 authored by Niklas Bohn's avatar Niklas Bohn
Browse files

Implemented segmentation and significantly updated code.

parent 8a6cafba
This diff is collapsed.
......@@ -25,11 +25,11 @@
import numpy as np
from numpy import logical_and as aand
import dill
from tqdm import tqdm
from os.path import split, abspath
import scipy as s
from scipy import logical_and as aand
from scipy.interpolate import interp1d
from scipy.io import loadmat
from scipy.linalg import norm
......@@ -44,17 +44,15 @@ from sicor.Tools.EnMAP.metadata import varsol
class MultiComponentSurface(object):
"""A model of the surface based on a collection of multivariate
Gaussians, with one or more equiprobable components and full
covariance matrices.
"""
A model of the surface based on a collection of multivariate Gaussians, with one or more equiprobable components
and full covariance matrices.
To evaluate the probability of a new spectrum, we calculate the
Mahalanobis distance to each component cluster, and use that as our
Multivariate Gaussian surface model.
To evaluate the probability of a new spectrum, we calculate the Mahalanobis distance to each component cluster,
and use that as our Multivariate Gaussian surface model.
"""
def __init__(self, config):
"""."""
# Models are stored as dictionaries in .mat format
config = json_load_ascii(config, shell_replace=True)
......@@ -69,55 +67,48 @@ class MultiComponentSurface(object):
if self.normalize == 'Euclidean':
self.e_norm = lambda r: norm(r)
elif self.normalize == 'RMS':
self.e_norm = lambda r: s.sqrt(s.mean(pow(r, 2)))
self.e_norm = lambda r: np.sqrt(np.mean(pow(r, 2)))
elif self.normalize == 'None':
self.e_norm = lambda r: 1.0
else:
raise ValueError('Unrecognized Normalization: %s\n' %
self.normalize)
raise ValueError('Unrecognized Normalization: %s\n' % self.normalize)
self.selection_metric = 'Mahalanobis'
# This field, if present and set to true, forces us to use
# any initialization state and never change. The state is
# preserved in the geometry object so that this object stays
# stateless
# This field, if present and set to true, forces us to use any initialization state and never change.
# The state is preserved in the geometry object so that this object stays stateless
self.select_on_init = False
# Reference values are used for normalizing the reflectances.
# in the VSWIR regime, reflectances are normalized so that the model
# is agnostic to absolute magnitude.
self.refwl = s.squeeze(model_dict['refwl'])
self.idx_ref = [s.argmin(abs(self.wl-w))
for w in s.squeeze(self.refwl)]
self.idx_ref = s.array(self.idx_ref)
# Reference values are used for normalizing the reflectances. In the VSWIR regime, reflectances are normalized
# so that the model is agnostic to absolute magnitude
self.refwl = np.squeeze(model_dict['refwl'])
self.idx_ref = [np.argmin(abs(self.wl-w)) for w in np.squeeze(self.refwl)]
self.idx_ref = np.array(self.idx_ref)
# Cache some important computations
self.Covs, self.Cinvs, self.mus = [], [], []
for i in range(self.n_comp):
Cov = self.components[i][1]
self.Covs.append(s.array([Cov[j, self.idx_ref]
for j in self.idx_ref]))
self.Covs.append(np.array([Cov[j, self.idx_ref] for j in self.idx_ref]))
self.Cinvs.append(svd_inv(self.Covs[-1]))
self.mus.append(self.components[i][0][self.idx_ref])
# Variables retrieved: each channel maps to a reflectance model parameter
rmin, rmax = 0, 10.0
self.statevec = ['RFL_%04i' % int(w) for w in self.wl]
self.bounds = [[rmin, rmax] for w in self.wl]
self.scale = [1.0 for w in self.wl]
self.init = [0.15 * (rmax-rmin)+rmin for v in self.wl]
self.idx_lamb = s.arange(self.n_wl)
self.bounds = [[rmin, rmax]]
self.scale = [1.0]
self.init = [0.15 * (rmax-rmin)+rmin]
self.idx_lamb = np.arange(self.n_wl)
self.n_state = len(self.statevec)
def component(self, x):
"""We pick a surface model component using the Mahalanobis distance.
"""
We pick a surface model component using the Mahalanobis distance.
This always uses the Lambertian (non-specular) version of the
surface reflectance. If the forward model initialize via heuristic
(i.e. algebraic inversion), the component is only calculated once
based on that first solution. That state is preserved in the
geometry object.
This always uses the Lambertian (non-specular) version of the surface reflectance. If the forward model
initialize via heuristic (i.e. algebraic inversion), the component is only calculated once based on that first
solution. That state is preserved in the geometry object.
"""
x_surface = x
......@@ -137,15 +128,16 @@ class MultiComponentSurface(object):
else:
md = sum(pow(lamb_ref - ref_mu, 2))
mds.append(md)
closest = s.argmin(mds)
closest = np.argmin(mds)
return closest
def xa(self, x_surface):
"""Mean of prior distribution, calculated at state x. We find
the covariance in a normalized space (normalizing by z) and then un-
normalize the result for the calling function. This always uses the
Lambertian (non-specular) version of the surface reflectance."""
"""
Mean of prior distribution, calculated at state x. We find the covariance in a normalized space
(normalizing by z) and then unnormalize the result for the calling function. This always uses the Lambertian
(non-specular) version of the surface reflectance.
"""
lamb = self.calc_lamb(x_surface)
lamb_ref = lamb[self.idx_ref]
......@@ -157,9 +149,10 @@ class MultiComponentSurface(object):
return mu
def Sa(self, x_surface):
"""Covariance of prior distribution, calculated at state x. We find
the covariance in a normalized space (normalizing by z) and then un-
normalize the result for the calling function."""
"""
Covariance of prior distribution, calculated at state x. We find the covariance in a normalized space
(normalizing by z) and then unnormalize the result for the calling function.
"""
lamb = self.calc_lamb(x_surface)
lamb_ref = lamb[self.idx_ref]
......@@ -170,13 +163,14 @@ class MultiComponentSurface(object):
return Cov
def calc_lamb(self, x_surface):
"""Lambertian reflectance."""
"""
Lambertian reflectance.
"""
return x_surface[self.idx_lamb]
def surface_model(config, logger=None):
"""."""
configdir, configfile = split(abspath(config))
config = json_load_ascii(config, shell_replace=True)
......@@ -192,7 +186,7 @@ def surface_model(config, logger=None):
outfile = expand_path(configdir, config['output_model_file'])
# load wavelengths file
q = s.loadtxt(wavelength_file)
q = np.loadtxt(wavelength_file)
if q.shape[1] > 2:
q = q[:, 1:]
if q[0, 0] < 100:
......@@ -205,27 +199,19 @@ def surface_model(config, logger=None):
for wi, window in enumerate(reference_windows):
active_wl = aand(wl >= window[0], wl < window[1])
refwl.extend(wl[active_wl])
normind = s.array([s.argmin(abs(wl - w)) for w in refwl])
refwl = s.array(refwl, dtype=float)
normind = np.array([np.argmin(abs(wl - w)) for w in refwl])
refwl = np.array(refwl, dtype=float)
# create basic model template
model = {
'normalize': normalize,
'wl': wl,
'means': [],
'covs': [],
'refwl': refwl
}
model = {'normalize': normalize, 'wl': wl, 'means': [], 'covs': [], 'refwl': refwl}
for si, source_config in enumerate(config['sources']):
# Determine source parameters
# determine source parameters
for q in ['input_spectrum_files', 'windows', 'n_components', 'windows']:
if q not in source_config:
raise ValueError(
'Source %i is missing a parameter: %s' % (si, q))
raise ValueError('Source %i is missing a parameter: %s' % (si, q))
# Determine whether we should synthesize our own mixtures
# determine whether we should synthesize our own mixtures
if 'mixtures' in source_config:
mixtures = source_config['mixtures']
elif 'mixtures' in config:
......@@ -233,8 +219,7 @@ def surface_model(config, logger=None):
else:
mixtures = 0
infiles = [expand_path(configdir, fi) for fi in
source_config['input_spectrum_files']]
infiles = [expand_path(configdir, fi) for fi in source_config['input_spectrum_files']]
ncomp = int(source_config['n_components'])
windows = source_config['windows']
......@@ -245,19 +230,18 @@ def surface_model(config, logger=None):
logger.info("Loading " + infile + "...")
hdrfile = infile + '.hdr'
rfl = envi.open(hdrfile, infile)
nl, nb, ns = [int(rfl.metadata[n])
for n in ('lines', 'bands', 'samples')]
swl = s.array([float(f) for f in rfl.metadata['wavelength']])
nl, nb, ns = [int(rfl.metadata[n]) for n in ('lines', 'bands', 'samples')]
swl = np.array([float(f) for f in rfl.metadata['wavelength']])
# Maybe convert to nanometers
# maybe convert to nanometers
if swl[0] < 100:
swl = swl * 1000.0
rfl_mm = rfl.open_memmap(interleave='source', writable=True)
if rfl.metadata['interleave'] == 'bip':
x = s.array(rfl_mm[:, :, :])
if rfl.metadata['interleave'] == 'bil':
x = s.array(rfl_mm[:, :, :]).transpose((0, 2, 1))
x = np.array(rfl_mm[:, :, :])
else:
x = np.array(rfl_mm[:, :, :]).transpose((0, 2, 1))
x = x.reshape(nl * ns, nb)
# import spectra and resample
......@@ -273,25 +257,24 @@ def surface_model(config, logger=None):
s2, m2 = spectra[int(s.rand() * n)], 1.0 - m1
spectra.append(m1 * s1 + m2 * s2)
spectra = s.array(spectra)
use = s.all(s.isfinite(spectra), axis=1)
spectra = np.array(spectra)
use = np.all(np.isfinite(spectra), axis=1)
spectra = spectra[use, :]
# accumulate total list of window indices
window_idx = -s.ones((nchan), dtype=int)
window_idx = -np.ones(nchan, dtype=int)
for wi, win in enumerate(windows):
active_wl = aand(wl >= win['interval'][0], wl < win['interval'][1])
window_idx[active_wl] = wi
# Two step model. First step is k-means initialization
# two step model. First step is k-means initialization
kmeans = KMeans(init='k-means++', n_clusters=ncomp, n_init=10)
kmeans.fit(spectra)
Z = kmeans.predict(spectra)
for ci in range(ncomp):
m = s.mean(spectra[Z == ci, :], axis=0)
C = s.cov(spectra[Z == ci, :], rowvar=False)
m = np.mean(spectra[Z == ci, :], axis=0)
C = np.cov(spectra[Z == ci, :], rowvar=False)
for i in range(nchan):
window = windows[window_idx[i]]
......@@ -303,34 +286,33 @@ def surface_model(config, logger=None):
C[i, :] = 0
C[i, i] = ci + float(window['regularizer'])
else:
raise ValueError(
'I do not recognize the source ' + window['correlation'])
raise ValueError('I do not recognize the source ' + window['correlation'])
# Normalize the component spectrum if desired
if normalize == 'Euclidean':
z = s.sqrt(s.sum(pow(m[normind], 2)))
z = np.sqrt(np.sum(pow(m[normind], 2)))
elif normalize == 'RMS':
z = s.sqrt(s.mean(pow(m[normind], 2)))
z = np.sqrt(np.mean(pow(m[normind], 2)))
elif normalize == 'None':
z = 1.0
else:
raise ValueError(
'Unrecognized normalization: %s\n' % normalize)
raise ValueError('Unrecognized normalization: %s\n' % normalize)
m = m / z
C = C / (z ** 2)
model['means'].append(m)
model['covs'].append(C)
model['means'] = s.array(model['means'])
model['covs'] = s.array(model['covs'])
model['means'] = np.array(model['means'])
model['covs'] = np.array(model['covs'])
s.io.savemat(outfile, model)
logger.info("Saving results to " + outfile + "...")
def wv_band_ratio(data, fn_table, vza, sza, dem, aot, raa, intp_wvl, intp_fwhm, jday, month, idx, disable=False):
"""Band ratio water vapor retrieval.
"""
Band ratio water vapor retrieval.
:param data: image dataset
:param fn_table: path to radiative transfer LUT
......@@ -348,6 +330,7 @@ def wv_band_ratio(data, fn_table, vza, sza, dem, aot, raa, intp_wvl, intp_fwhm,
:param disable: if True, progressbar during retrieval is disabled; default: False
:return: water vapor image
"""
cnt_land = len(np.ndarray.flatten(data[:, :, idx[1]]))
num_bd = 2
......@@ -442,7 +425,7 @@ def wv_band_ratio(data, fn_table, vza, sza, dem, aot, raa, intp_wvl, intp_fwhm,
dem_fac_pix = dem_fac[ind]
if ll_cal[ind] < rhos[0]:
ll_cal[ind] = rhos[0]
ll_cal[ind] = rhos[0] + 0.001
ll_cal_low = ll_cal[ind] >= rhos
ll_cal_high = ll_cal[ind] <= rhos
idx_low = np.where(ll_cal_low)
......
......@@ -24,16 +24,18 @@
# If not, see <https://www.gnu.org/licenses/>.
import scipy
from scipy.linalg import eigh, norm
from scipy.spatial import KDTree
from scipy.stats import linregress
from skimage.segmentation import slic
import numpy as np
from multiprocessing import Pool
def SLIC_segmentation(data_rad_all, n_pca, segs):
"""Perform superpixel segmentation by searching the image for a handful of geographically and compositionally
representative locations. Create spatially-contiguous clusters of similar radiances.
"""
Perform superpixel segmentation by searching the image for a handful of geographically and compositionally
representative locations. Create spatially-contiguous clusters of similar radiances.
:param data_rad_all: radiance data array with shape (rows, cols, bands)
:param n_pca: number of principle components
......@@ -43,15 +45,15 @@ def SLIC_segmentation(data_rad_all, n_pca, segs):
# Change from BIP to a long list of spectra
rows, bands, cols = data_rad_all.shape[0], data_rad_all.shape[2], data_rad_all.shape[1]
X = scipy.asarray(data_rad_all, dtype=scipy.float32).reshape([rows * cols, bands])
X = np.asarray(data_rad_all, dtype=np.float32).reshape([rows * cols, bands])
# Excluding bad locations, calculate top PCA coefficients
mu = X.mean(axis=0)
C = scipy.cov(X, rowvar=False)
C = np.cov(X, rowvar=False)
[v, d] = eigh(C)
# Determine segmentation compactness scaling based on top 5 eigenvalues
cmpct = norm(scipy.sqrt(v[-n_pca:]))
cmpct = norm(np.sqrt(v[-n_pca:]))
# Project, redimension as an image with 5 channels, and segment
X_pca = (X - mu) @ d[:, -n_pca:]
......@@ -59,14 +61,15 @@ def SLIC_segmentation(data_rad_all, n_pca, segs):
labels = slic(X_pca, n_segments=segs, compactness=cmpct, max_iter=10, sigma=0, multichannel=True,
enforce_connectivity=True, min_size_factor=0.5, max_size_factor=3)
lbl = scipy.unique(labels)
lbl = np.unique(labels)
segs = len(lbl)
return X, segs, labels
def empirical_line_solution(X, rdn_subset, data_l2a_seg, rows, cols, bands, segs, labels):
"""Apply empirical line solution to infer exact result of remaining spectra.
def empirical_line_solution(X, rdn_subset, data_l2a_seg, rows, cols, bands, segs, labels, processes):
"""
Apply empirical line solution to infer exact result of remaining spectra.
:param X: flattened radiance data array
:param rdn_subset: radiance data subset
......@@ -76,51 +79,42 @@ def empirical_line_solution(X, rdn_subset, data_l2a_seg, rows, cols, bands, segs
:param bands: number of instrument bands
:param segs: number of segments
:param labels: array of segment labels for each pixel
:param processes: number of CPUs for multiprocessing
:return: extrapolated surface reflectance for each pixel
"""
# First set up a matrix of locations, one per spectrum in both
# the reference set and the image cube. We will simply use
# row and column indices for simplicity, rather than worrying
# about geographic or 3D locations
row_grid, col_grid = scipy.meshgrid(scipy.arange(rows), scipy.arange(cols))
locations = scipy.array([col_grid.flatten(), row_grid.flatten()]).T
locations_subset = scipy.zeros((segs, 2))
# First set up a matrix of locations, one per spectrum in both the reference set and the image cube.
# We will simply use row and column indices for simplicity, rather than worrying about geographic or 3D locations
row_grid, col_grid = np.meshgrid(np.arange(rows), np.arange(cols))
locations = np.array([col_grid.flatten(), row_grid.flatten()]).T
locations_subset = np.zeros((segs, 2))
for i in range(segs):
locations_subset[i, :] = locations[labels.flat == i, :].mean(axis=0)
tree = KDTree(locations_subset)
# Empirical line solution, hashing prior solutions as we go
k = 15
hash_table = {}
reflectance = scipy.zeros([rows * cols, bands])
# compute slopes and intercepts in the same shape as radiance and reflectance
unique_labels = np.unique(labels)
global _globs
_globs = dict(tree=tree, locs=locations_subset, k=15, rdn=rdn_subset, data=data_l2a_seg, bands=bands)
with Pool(processes=processes) as pool:
results = pool.map(_compute_coefficients_for_label, unique_labels)
for idx in range(rows * cols):
slopes, intercepts = np.empty([rows * cols, bands]), np.empty([rows * cols, bands])
for lbl, coeffs in zip(unique_labels, results):
slopes[labels.flat == lbl, :], intercepts[labels.flat == lbl, :] = coeffs
if idx % 20000 == 0:
print('extrapolating spectrum %i / %i' % (idx + 1, rows * cols))
# finally compute reflectance
reflectance = X * slopes + intercepts
# Look up the point in our KD Tree
label_idx = labels.flat[idx]
return reflectance
# Have we already calculated coefficients for this set of nearest-
# neighbor spectra?
if label_idx in hash_table:
b = hash_table[label_idx]
# If not, treat them as a local representative set of paired
# radiance/reflectance spectra to enable an empirical line solution
else:
dists, nn = tree.query(locations_subset[label_idx, :], k)
xv = rdn_subset[:, nn, :]
yv = data_l2a_seg[:, nn, :]
b = scipy.zeros((bands, 2))
for i in scipy.arange(bands):
b[i, 1], b[i, 0], q1, q2, q3 = linregress(xv[:, :, i], yv[:, :, i])
hash_table[label_idx] = b
_globs = dict()
# Apply the solution coefficients to spectrum "idx" in the cube
A = scipy.array((scipy.ones(bands), X[idx, :]))
reflectance[idx, :] = (b.T * A).sum(axis=0)
return reflectance
def _compute_coefficients_for_label(label_idx):
nn = _globs["tree"].query(_globs["locs"][label_idx, :], _globs["k"])[1]
label_slopes, label_intercepts = zip(*[linregress(x=_globs["rdn"][:, nn, b],
y=_globs["data"][:, nn, b])[:2] for b in range(_globs["bands"])])
return label_slopes, label_intercepts
......@@ -41,8 +41,11 @@
},
"retrieval": {
"fn_LUT": "",
"cpu":12,
"cpu":32,
"disable_progressbars": false,
"segmentation": true,
"n_pca": 5,
"segs": 200,
"state_params": ["water_vapor", "aot", "surface_reflectance"],
"atmosphere": {
"state_vector": {
......@@ -63,7 +66,7 @@
}
},
"surface": {
"config": "/Users/niklasbohn/Desktop/sicor/sicor/options/surface_model_options.json"
"config": "/Users/niklasbohn/Desktop/SICOR_dev_ISOFIT_approach/sicor/sicor/options/surface_model_options.json"
},
"unknowns": {
"skyview": {
......
{
"sensor":{
"name": "EnMAP",
"fit":{
"wvl_center": [1051.3, 1062.6, 1074, 1085.4, 1096.9, 1108.5, 1120.1, 1131.8, 1143.5, 1155.3, 1167.1, 1179,
1190.9, 1202.8, 1214.8, 1226.7, 1238.7, 1250.7],
"fwhm": [13.52, 13.62, 13.72, 13.8, 13.88, 13.96, 14.03, 14.09, 14.15, 14.19, 14.24, 14.28, 14.31, 14.34,
14.36, 14.38, 14.39, 14.4],
"snr": [322.94254006, 313.45752342, 313.32464722, 306.19455173, 281.35641378, 227.9759974, 157.22390595,
148.93620623, 154.40197388, 182.60431866, 232.89644996, 250.1244036, 252.2267179, 272.16592448,
299.71964816, 316.11909184, 326.33202741, 312.28461288],
"idx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
}
},
"metadata":{
"jday": 16,
"month": 6,
"sza": 40,
"saa": 360,
"vza": 0,
"hsf": 0,
"aot": 0.2
},
"retrieval": {
"fn_LUT": "",
"cpu":12,
"state_vector": {
"cwv": {
"prior_mean": 2.5,
"use_prior_mean": false,
"ll": 0.0001,
"ul": 4.9999,
"prior_sigma": 1000
},
"ewt": {
"prior_mean": 0.02,
"use_prior_mean": false,
"ll": 0.0001,
"ul": 0.4999,
"prior_sigma": 0.01
},
"ice": {
"prior_mean": 0,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 0.4999,
"prior_sigma": 3.1622776601683795e-05
},
"lai": {
"prior_mean": 5,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 12.9999,
"prior_sigma": 1000
},
"dasf": {
"prior_mean": 0.25,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 0.9999,
"prior_sigma": 1000
},
"p": {
"prior_mean": 0.7,
"use_prior_mean": true,
"ll": 0.4001,
"ul": 0.9999,
"prior_sigma": 1000
},
"p_max": {
"prior_mean": 0.88,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 0.9999,
"prior_sigma": 1000
},
"k": {
"prior_mean": 0.7,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 0.9999,
"prior_sigma": 1000
},
"b": {
"prior_mean": 0.75,
"use_prior_mean": true,
"ll": 0.0001,
"ul": 0.9999,
"prior_sigma": 1000
}
},
"unknowns": {
"Skyview": {
"sigma": 0.1
},
"H2O_ABSCO": {
"sigma": 0.01
},
"Liquid_ABSCO": {
"sigma": 2.43e-07
},
"Ice_ABSCO": {
"sigma": 2.39e-07
}
},
"inversion": {
"gnform": "n",
"full": false,